#Data Preprocessing: ##Replace all the missing values with NA.

library(plotly)
package 㤼㸱plotly㤼㸲 was built under R version 3.6.3Loading required package: ggplot2
package 㤼㸱ggplot2㤼㸲 was built under R version 3.6.3
Attaching package: 㤼㸱plotly㤼㸲

The following object is masked from 㤼㸱package:ggplot2㤼㸲:

    last_plot

The following object is masked from 㤼㸱package:stats㤼㸲:

    filter

The following object is masked from 㤼㸱package:graphics㤼㸲:

    layout

##Let’s take a look at the data structure

str(censusData)
'data.frame':   30162 obs. of  15 variables:
 $ age           : int  39 50 38 53 28 37 49 52 31 42 ...
 $ workclass     : Factor w/ 7 levels "Federal-gov",..: 6 5 3 3 3 3 3 5 3 3 ...
 $ fnlwgt        : int  77516 83311 215646 234721 338409 284582 160187 209642 45781 159449 ...
 $ education     : Factor w/ 16 levels "10th","11th",..: 10 10 12 2 10 13 7 12 13 10 ...
 $ education.num : int  13 13 9 7 13 14 5 9 14 13 ...
 $ marital.status: Factor w/ 7 levels "Divorced","Married-AF-spouse",..: 5 3 1 3 3 3 4 3 5 3 ...
 $ occupation    : Factor w/ 14 levels "Adm-clerical",..: 1 4 6 6 10 4 8 4 10 4 ...
 $ relationship  : Factor w/ 6 levels "Husband","Not-in-family",..: 2 1 2 1 6 6 2 1 2 1 ...
 $ race          : Factor w/ 5 levels "Amer-Indian-Eskimo",..: 5 5 5 3 3 5 3 5 5 5 ...
 $ sex           : Factor w/ 2 levels "Female","Male": 2 2 2 2 1 1 1 2 1 2 ...
 $ capital.gain  : int  2174 0 0 0 0 0 0 0 14084 5178 ...
 $ capital.loss  : int  0 0 0 0 0 0 0 0 0 0 ...
 $ hours.per.week: int  40 13 40 40 40 40 16 45 50 40 ...
 $ native.country: Factor w/ 41 levels "Cambodia","Canada",..: 39 39 39 39 5 39 23 39 39 39 ...
 $ X             : Factor w/ 2 levels "<=50K",">50K": 1 1 1 1 1 1 1 2 2 2 ...
 - attr(*, "na.action")= 'omit' Named int  15 28 39 52 62 70 78 94 107 129 ...
  ..- attr(*, "names")= chr  "15" "28" "39" "52" ...

convert column into character. Because the dataset was imported as Factor, we need to change the columns into characters so we can manipulate the data

censusData$workclass<-as.character(censusData$workclass)
 censusData$occupation<-as.character(censusData$occupation)
 censusData$native.country<-as.character(censusData$native.country)
 censusData$education<-as.character(censusData$education)
 censusData$marital.status<-as.character(censusData$marital.status)
 censusData$relationship<-as.character(censusData$relationship)
 censusData$race<-as.character(censusData$race)
 censusData$sex<-as.character(censusData$sex)
 censusData$X<-as.character(censusData$X)

#Now, let’s look at the new structure

str(censusData)
'data.frame':   30162 obs. of  15 variables:
 $ age           : int  39 50 38 53 28 37 49 52 31 42 ...
 $ workclass     : chr  "State-gov" "Self-emp-not-inc" "Private" "Private" ...
 $ fnlwgt        : int  77516 83311 215646 234721 338409 284582 160187 209642 45781 159449 ...
 $ education     : chr  "Bachelors" "Bachelors" "HS-grad" "11th" ...
 $ education.num : int  13 13 9 7 13 14 5 9 14 13 ...
 $ marital.status: chr  "Never-married" "Married-civ-spouse" "Divorced" "Married-civ-spouse" ...
 $ occupation    : chr  "Adm-clerical" "Exec-managerial" "Handlers-cleaners" "Handlers-cleaners" ...
 $ relationship  : chr  "Not-in-family" "Husband" "Not-in-family" "Husband" ...
 $ race          : chr  "White" "White" "White" "Black" ...
 $ sex           : chr  "Male" "Male" "Male" "Male" ...
 $ capital.gain  : int  2174 0 0 0 0 0 0 0 14084 5178 ...
 $ capital.loss  : int  0 0 0 0 0 0 0 0 0 0 ...
 $ hours.per.week: int  40 13 40 40 40 40 16 45 50 40 ...
 $ native.country: chr  "United-States" "United-States" "United-States" "United-States" ...
 $ X             : chr  "<=50K" "<=50K" "<=50K" "<=50K" ...
 - attr(*, "na.action")= 'omit' Named int  15 28 39 52 62 70 78 94 107 129 ...
  ..- attr(*, "names")= chr  "15" "28" "39" "52" ...

##Let look at the missing value in the columns

table(is.na(censusData))

 FALSE 
452430 
censusData[censusData==" ?"]<- NA

##Remove all the rows that contain NA values.

censusData <-na.omit(censusData)

##Remove all whitespaces from the columns.

install.packages("stringr")
Error in install.packages : Updating loaded packages
install.packages("dplyr")
Error in install.packages : Updating loaded packages
library(stringr) 
package 㤼㸱stringr㤼㸲 was built under R version 3.6.3
library(dplyr)
package 㤼㸱dplyr㤼㸲 was built under R version 3.6.3
Attaching package: 㤼㸱dplyr㤼㸲

The following objects are masked from 㤼㸱package:stats㤼㸲:

    filter, lag

The following objects are masked from 㤼㸱package:base㤼㸲:

    intersect, setdiff, setequal, union
censusData<-mutate_if(censusData, is.character, str_trim)

##In order to work with models, vizualize, we need to convert them into factors

censusData$workclass<-as.factor(censusData$workclass)
censusData$occupation<-as.factor(censusData$occupation)
censusData$native.country<-as.factor(censusData$native.country)
censusData$education<-as.factor(censusData$education)
censusData$marital.status<-as.factor(censusData$marital.status)
censusData$relationship<-as.factor(censusData$relationship)
censusData$race<-as.factor(censusData$race)
censusData$sex<-as.factor(censusData$sex)
censusData$X<-as.factor(censusData$X)
str(censusData)
'data.frame':   30162 obs. of  15 variables:
 $ age           : int  39 50 38 53 28 37 49 52 31 42 ...
 $ workclass     : Factor w/ 7 levels "Federal-gov",..: 6 5 3 3 3 3 3 5 3 3 ...
 $ fnlwgt        : int  77516 83311 215646 234721 338409 284582 160187 209642 45781 159449 ...
 $ education     : Factor w/ 16 levels "10th","11th",..: 10 10 12 2 10 13 7 12 13 10 ...
 $ education.num : int  13 13 9 7 13 14 5 9 14 13 ...
 $ marital.status: Factor w/ 7 levels "Divorced","Married-AF-spouse",..: 5 3 1 3 3 3 4 3 5 3 ...
 $ occupation    : Factor w/ 14 levels "Adm-clerical",..: 1 4 6 6 10 4 8 4 10 4 ...
 $ relationship  : Factor w/ 6 levels "Husband","Not-in-family",..: 2 1 2 1 6 6 2 1 2 1 ...
 $ race          : Factor w/ 5 levels "Amer-Indian-Eskimo",..: 5 5 5 3 3 5 3 5 5 5 ...
 $ sex           : Factor w/ 2 levels "Female","Male": 2 2 2 2 1 1 1 2 1 2 ...
 $ capital.gain  : int  2174 0 0 0 0 0 0 0 14084 5178 ...
 $ capital.loss  : int  0 0 0 0 0 0 0 0 0 0 ...
 $ hours.per.week: int  40 13 40 40 40 40 16 45 50 40 ...
 $ native.country: Factor w/ 41 levels "Cambodia","Canada",..: 39 39 39 39 5 39 23 39 39 39 ...
 $ X             : Factor w/ 2 levels "<=50K",">50K": 1 1 1 1 1 1 1 2 2 2 ...
 - attr(*, "na.action")= 'omit' Named int  15 28 39 52 62 70 78 94 107 129 ...
  ..- attr(*, "names")= chr  "15" "28" "39" "52" ...

#let’s now plot

plot(censusData$X)

##Data Manipulation: In this phase I will perform data manipulation to analyze the data set using various functions from the dplyr package

summary(censusData)
      age                   workclass         fnlwgt               education    education.num                 marital.status 
 Min.   :17.00   Federal-gov     :  943   Min.   :  13769   HS-grad     :9840   Min.   : 1.00   Divorced             : 4214  
 1st Qu.:28.00   Local-gov       : 2067   1st Qu.: 117627   Some-college:6678   1st Qu.: 9.00   Married-AF-spouse    :   21  
 Median :37.00   Private         :22286   Median : 178425   Bachelors   :5044   Median :10.00   Married-civ-spouse   :14065  
 Mean   :38.44   Self-emp-inc    : 1074   Mean   : 189794   Masters     :1627   Mean   :10.12   Married-spouse-absent:  370  
 3rd Qu.:47.00   Self-emp-not-inc: 2499   3rd Qu.: 237629   Assoc-voc   :1307   3rd Qu.:13.00   Never-married        : 9726  
 Max.   :90.00   State-gov       : 1279   Max.   :1484705   11th        :1048   Max.   :16.00   Separated            :  939  
                 Without-pay     :   14                     (Other)     :4618                   Widowed              :  827  
           occupation           relationship                   race           sex         capital.gain    capital.loss    
 Prof-specialty :4038   Husband       :12463   Amer-Indian-Eskimo:  286   Female: 9782   Min.   :    0   Min.   :   0.00  
 Craft-repair   :4030   Not-in-family : 7726   Asian-Pac-Islander:  895   Male  :20380   1st Qu.:    0   1st Qu.:   0.00  
 Exec-managerial:3992   Other-relative:  889   Black             : 2817                  Median :    0   Median :   0.00  
 Adm-clerical   :3721   Own-child     : 4466   Other             :  231                  Mean   : 1092   Mean   :  88.37  
 Sales          :3584   Unmarried     : 3212   White             :25933                  3rd Qu.:    0   3rd Qu.:   0.00  
 Other-service  :3212   Wife          : 1406                                             Max.   :99999   Max.   :4356.00  
 (Other)        :7585                                                                                                     
 hours.per.week        native.country      X        
 Min.   : 1.00   United-States:27504   <=50K:22654  
 1st Qu.:40.00   Mexico       :  610   >50K : 7508  
 Median :40.00   Philippines  :  188                
 Mean   :40.93   Germany      :  128                
 3rd Qu.:45.00   Puerto-Rico  :  109                
 Max.   :99.00   Canada       :  107                
                 (Other)      : 1516                

##Extract the “education” column and store it in “census_ed”

census_ed<-censusData$education
View(census_ed)
class(census_ed)
[1] "factor"
head(census_ed)
[1] Bachelors Bachelors HS-grad   11th      Bachelors Masters  
16 Levels: 10th 11th 12th 1st-4th 5th-6th 7th-8th 9th Assoc-acdm Assoc-voc Bachelors Doctorate HS-grad Masters Preschool ... Some-college

##Extract all the columns from “age” to “relationship” and store it in “census_seq”.

install.packages("dplyr")
Error in install.packages : Updating loaded packages
library(dplyr)
census_seq<-select(censusData,age:relationship)
census_seq

##Extract the column number “5”, “8”, “11” and store it in “census_col”

census_col<-censusData[,c(5,8,11)]
View(census_col)
head(census_col)

##Extract all the male employees who work in state-gov and store it in “male_gov”.

install.packages("dplyr")
WARNING: Rtools is required to build R packages but is not currently installed. Please download and install the appropriate version of Rtools before proceeding:

https://cran.rstudio.com/bin/windows/Rtools/
Installing package into 㤼㸱C:/Users/ADMIN/Documents/R/win-library/3.6㤼㸲
(as 㤼㸱lib㤼㸲 is unspecified)
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.6/dplyr_1.0.2.zip'
Content type 'application/zip' length 1527701 bytes (1.5 MB)
downloaded 1.5 MB
package ‘dplyr’ successfully unpacked and MD5 sums checked

The downloaded binary packages are in
    C:\Users\ADMIN\AppData\Local\Temp\RtmpeEiq4m\downloaded_packages
library(dplyr)
package 㤼㸱dplyr㤼㸲 was built under R version 3.6.3
Attaching package: 㤼㸱dplyr㤼㸲

The following objects are masked from 㤼㸱package:stats㤼㸲:

    filter, lag

The following objects are masked from 㤼㸱package:base㤼㸲:

    intersect, setdiff, setequal, union
male_gov<-censusData%>% filter(sex == "Male" & workclass=="State-gov")
View(male_gov)

##Extract all the 39 year olds who either have a bachelor’s degree # or who are native of United States and store the result in “census_us”

table(censusData$native.country)

                  Cambodia                     Canada                      China                   Columbia                       Cuba 
                        18                        107                         68                         56                         92 
        Dominican-Republic                    Ecuador                El-Salvador                    England                     France 
                        67                         27                        100                         86                         27 
                   Germany                     Greece                  Guatemala                      Haiti         Holand-Netherlands 
                       128                         29                         63                         42                          1 
                  Honduras                       Hong                    Hungary                      India                       Iran 
                        12                         19                         13                        100                         42 
                   Ireland                      Italy                    Jamaica                      Japan                       Laos 
                        24                         68                         80                         59                         17 
                    Mexico                  Nicaragua Outlying-US(Guam-USVI-etc)                       Peru                Philippines 
                       610                         33                         14                         30                        188 
                    Poland                   Portugal                Puerto-Rico                   Scotland                      South 
                        56                         34                        109                         11                         71 
                    Taiwan                   Thailand            Trinadad&Tobago              United-States                    Vietnam 
                        42                         17                         18                      27504                         64 
                Yugoslavia 
                        16 
table(censusData$education)

        10th         11th         12th      1st-4th      5th-6th      7th-8th          9th   Assoc-acdm    Assoc-voc    Bachelors 
         820         1048          377          151          288          557          455         1008         1307         5044 
   Doctorate      HS-grad      Masters    Preschool  Prof-school Some-college 
         375         9840         1627           45          542         6678 
census_us<-censusData%>%filter(age==39&(education=="Bachelors"|native.country=="United-States"))
View(census_us)

##Extract 200 random rows from the “census” data frame and store it in “census_200”.

census_200<-sample_n(censusData,200)
View(census_200)

##Get the count of different levels of the “workclass” column.

install.packages("plyr")
WARNING: Rtools is required to build R packages but is not currently installed. Please download and install the appropriate version of Rtools before proceeding:

https://cran.rstudio.com/bin/windows/Rtools/
Installing package into 㤼㸱C:/Users/ADMIN/Documents/R/win-library/3.6㤼㸲
(as 㤼㸱lib㤼㸲 is unspecified)
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.6/plyr_1.8.6.zip'
Content type 'application/zip' length 1314846 bytes (1.3 MB)
downloaded 1.3 MB
package ‘plyr’ successfully unpacked and MD5 sums checked

The downloaded binary packages are in
    C:\Users\ADMIN\AppData\Local\Temp\RtmpeEiq4m\downloaded_packages
library(plyr)
package 㤼㸱plyr㤼㸲 was built under R version 3.6.3---------------------------------------------------------------------------------------------------------------------------------------
You have loaded plyr after dplyr - this is likely to cause problems.
If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
library(plyr); library(dplyr)
---------------------------------------------------------------------------------------------------------------------------------------

Attaching package: 㤼㸱plyr㤼㸲

The following objects are masked from 㤼㸱package:dplyr㤼㸲:

    arrange, count, desc, failwith, id, mutate, rename, summarise, summarize
countWcls<-count(censusData$workclass)
countWcls
table(censusData$workclass)

     Federal-gov        Local-gov          Private     Self-emp-inc Self-emp-not-inc        State-gov      Without-pay 
             943             2067            22286             1074             2499             1279               14 

##Calculate the mean of “capital.gain” column grouped according to “workclass”.

tapply(censusData$capital.gain,censusData$workclass,mean)
     Federal-gov        Local-gov          Private     Self-emp-inc Self-emp-not-inc        State-gov      Without-pay 
        832.3213         829.2303         879.8582        4810.7467        1913.1345         684.3065         487.8571 

#Data Visualization:

install.packages("ggplot2")
WARNING: Rtools is required to build R packages but is not currently installed. Please download and install the appropriate version of Rtools before proceeding:

https://cran.rstudio.com/bin/windows/Rtools/
Installing package into 㤼㸱C:/Users/ADMIN/Documents/R/win-library/3.6㤼㸲
(as 㤼㸱lib㤼㸲 is unspecified)
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.6/ggplot2_3.3.2.zip'
Content type 'application/zip' length 4068917 bytes (3.9 MB)
downloaded 3.9 MB
package ‘ggplot2’ successfully unpacked and MD5 sums checked

The downloaded binary packages are in
    C:\Users\ADMIN\AppData\Local\Temp\RtmpeEiq4m\downloaded_packages
library(ggplot2)
package 㤼㸱ggplot2㤼㸲 was built under R version 3.6.3

##Build a bar-plot for the “relationship” column and fill the bars according to the “race” # column.

ggplot(censusData,aes(x=relationship,fill=race))+
  geom_bar()

##Set x-axis label to ‘Categories of Relationships’ ##Set y-axis label to ‘Count of Categories’

ggplot(censusData,aes(x=relationship,fill=race))+
  geom_bar()+
  labs(x="Categories of Relationships",y="Count of Categories")

##Fill the bars according to “sex”

ggplot(censusData,aes(x=relationship,fill=sex))+
  geom_bar()+
  labs(x="Categories of Relationships",y="Count of Categories")

##Set the position of the bars to “dodge”

ggplot(censusData,aes(x=relationship,fill=sex))+
  geom_bar(position = "dodge")+
  labs(x="Categories of Relationships",y="Count of Categories")

##Set the title of plot to be ’Distribution of Relationships by Sex"

ggplot(censusData,aes(x=relationship,fill=sex))+
  geom_bar(position = "dodge")+
  labs(x="Categories of Relationships",y="Count of Categories",title = "Distribution of Relationships by Sex")

##Build a Histogram for the “age” column with number of bins equal to 50.

ggplot(censusData,aes(x=age))+geom_histogram(bins = 50)

table(censusData$age)

 17  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50 
328 447 594 629 621 674 824 752 799 745 789 808 774 813 851 789 837 836 828 852 828 791 786 765 769 741 743 704 706 711 683 523 555 575 
 51  52  53  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72  73  74  75  76  77  78  79  80  81  82  83  84 
571 455 448 394 386 343 337 344 332 276 259 213 186 173 136 110 111  90  80  64  54  40  49  38  34  29  20  14  15  16  13   7   5   8 
 85  86  88  90 
  3   1   3  35 

##Fill the bars of the histogram according to yearly income column i.e., “X”

ggplot(censusData,aes(x=age,fill=X))+geom_histogram(bins = 90)

##Set the title of the plot to “Distribution of Age”.

ggplot(censusData,aes(x=age,fill=X))+geom_histogram(bins = 90)+
  labs(title = "Distribution of Age")

##Set the legend title to “Yearly income”.

ggplot(censusData,aes(x=age,fill=X))+geom_histogram(bins = 90)+
  labs(title = "Distribution of Age",fill='Yearly income')

##Set the theme of the plot to black and white.

ggplot(censusData,aes(x=age))+geom_histogram(bins =90)+
  labs(title = "Distribution of Age")+theme()

##Build a scatter-plot between “capital.gain” and “hours.per.week”. ## Map “capital.gain” on the x- axis and “hours.per.week” on the y-axis.

ggplot(censusData,aes(x=capital.gain,y=hours.per.week))+geom_point()

##Set the transparency of the points to 40% and size as 2.

ggplot(censusData,aes(x=capital.gain,y=hours.per.week))+
  geom_point(alpha=0.6,size=2)

##Set the color of the points according to the “X” (yearly income) column.

ggplot(censusData,aes(x=capital.gain,y=hours.per.week,fill=X))+geom_point()

##Set the x-axis label to “Capital Gain”, y-axis label to “Hours per Week”, title # to “Capital Gain vs Hours per Week by Income”, and legend label to “Yearly Income”.

ggplot(censusData,aes(x=capital.gain,y=hours.per.week,fill=X))+
  geom_point(alpha=0.6,size=2)+
labs(x="Capital Gain",y="Hours per Week",
     title = "Capital Gain vs Hours per Week by Income", fill="Yearly Income")

install.packages("plotly")
WARNING: Rtools is required to build R packages but is not currently installed. Please download and install the appropriate version of Rtools before proceeding:

https://cran.rstudio.com/bin/windows/Rtools/
Installing package into 㤼㸱C:/Users/ADMIN/Documents/R/win-library/3.6㤼㸲
(as 㤼㸱lib㤼㸲 is unspecified)
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.6/plotly_4.9.2.1.zip'
Content type 'application/zip' length 3044949 bytes (2.9 MB)
downloaded 2.9 MB
package ‘plotly’ successfully unpacked and MD5 sums checked

The downloaded binary packages are in
    C:\Users\ADMIN\AppData\Local\Temp\RtmpeEiq4m\downloaded_packages
library(plotly)
package 㤼㸱plotly㤼㸲 was built under R version 3.6.3
Attaching package: 㤼㸱plotly㤼㸲

The following object is masked from 㤼㸱package:ggplot2㤼㸲:

    last_plot

The following objects are masked from 㤼㸱package:plyr㤼㸲:

    arrange, mutate, rename, summarise

The following object is masked from 㤼㸱package:stats㤼㸲:

    filter

The following object is masked from 㤼㸱package:graphics㤼㸲:

    layout
plot_ly(data=censusData, x = ~capital.gain, y = ~hours.per.week, color = ~X, type='scatter')
No scatter mode specifed:
  Setting the mode to markers
  Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
`arrange_()` is deprecated as of dplyr 0.7.0.
Please use `arrange()` instead.
See vignette('programming') for more help
This warning is displayed once every 8 hours.
Call `lifecycle::last_warnings()` to see where this warning was generated.minimal value for n is 3, returning requested palette with 3 different levels
minimal value for n is 3, returning requested palette with 3 different levels
No scatter mode specifed:
  Setting the mode to markers
  Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
minimal value for n is 3, returning requested palette with 3 different levels
minimal value for n is 3, returning requested palette with 3 different levels

##Build a box-plot between “education” and “age” column.Map “education” on the x-axis and ## “age” on the y-axis.

ggplot(censusData,aes(x=education,y=age))+geom_boxplot()

Fill the box-plots according to the “sex” column.

ggplot(censusData,aes(x=education,y=age,fill=sex))+geom_boxplot()

Set the title to “Box-Plot of age by Education and Sex”.

ggplot(censusData,aes(x=education,y=age,fill=sex))+
   geom_boxplot()+labs(title = "Box-Plot of age by Education and Sex")

#Prediction guilding a Linear Regression Model: ## Build a simple linear regression model ##Divide the dataset into training and test sets in 70:30 ratio.

 set.seed(98)
 install.packages("caTools")
WARNING: Rtools is required to build R packages but is not currently installed. Please download and install the appropriate version of Rtools before proceeding:

https://cran.rstudio.com/bin/windows/Rtools/
Installing package into 㤼㸱C:/Users/ADMIN/Documents/R/win-library/3.6㤼㸲
(as 㤼㸱lib㤼㸲 is unspecified)
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.6/caTools_1.18.0.zip'
Content type 'application/zip' length 330351 bytes (322 KB)
downloaded 322 KB
package ‘caTools’ successfully unpacked and MD5 sums checked

The downloaded binary packages are in
    C:\Users\ADMIN\AppData\Local\Temp\RtmpY9qMhV\downloaded_packages
 library("caTools")
package 㤼㸱caTools㤼㸲 was built under R version 3.6.3
 split_data<-sample.split(censusData$hours.per.week,SplitRatio = 0.70)
 View(split_data)
 censusTrain<-subset(censusData,split_data==T)
 censusTest<-subset(censusData,split_data==F)
 View(censusTrain)
 View(censusTest)
 nrow(censusTrain)
[1] 21113
 nrow(censusTest)
[1] 9049
View(split_data)

Build a linear model on the train set where the dependent variable is

##“hours.per.week” and independent variable is “education.num”. ## dependent~independ

View(censusData[c('hours.per.week','education.num')])
 
 LR_model<-lm(hours.per.week~education.num,data=censusTrain)
 summary(LR_model)

Call:
lm(formula = hours.per.week ~ education.num, data = censusTrain)

Residuals:
    Min      1Q  Median      3Q     Max 
-44.064  -2.954  -0.140   4.157  62.378 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)   33.80781    0.33445  101.08   <2e-16 ***
education.num  0.70353    0.03204   21.96   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 11.85 on 21111 degrees of freedom
Multiple R-squared:  0.02233,   Adjusted R-squared:  0.02229 
F-statistic: 482.3 on 1 and 21111 DF,  p-value: < 2.2e-16

Predicting the values on the test set and find the error in prediction.

##Find the root-mean-square error (RMSE).

 censusP<-predict(LR_model,newdata=censusTest)
 head(censusP)
       9       15       16       20       22       24 
43.65722 36.62193 40.13957 45.06428 37.32546 40.13957 
 View(censusP)
 censusD<-cbind(Actual=censusTest$hours.per.week,Predicted=censusP)
 View(censusD)
 class(censusD)
[1] "matrix"
 censusD<-as.data.frame(censusD)
 Error<-censusD$Actual-censusD$Predicted
 View(Error)
Data<-cbind(censusD,Error)
 View(Data)
sqrt(mean((Data$Error)^2))
[1] 11.82446
library(caret)
package 㤼㸱caret㤼㸲 was built under R version 3.6.3Loading required package: lattice
Loading required package: ggplot2
package 㤼㸱ggplot2㤼㸲 was built under R version 3.6.3
RMSE(censusP, censusTest$hours.per.week)
[1] 11.82446

#Prediction building a Logistic Regression

let’s divide the dataset into training and test sets in 65:35 ratio.

install.packages("caTools")
Error in install.packages : Updating loaded packages
 library("caTools")
 split_data1<-sample.split(censusData$X,SplitRatio = 0.65)
 censusTrain1<-subset(censusData,split_data1==T)
 censusTest1<-subset(censusData,split_data1==F)
 nrow(censusTrain1)
[1] 19605
 nrow(censusTest1)
[1] 10557

now let’s build a logistic regression model where the dependent variable is “X”(yearly income) and independent variable is “occupation”.

log_mod<-glm(X~occupation,data=censusTrain1,family = "binomial")
summary(log_mod)

Call:
glm(formula = X ~ occupation, family = "binomial", data = censusTrain1)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-1.1653  -0.7900  -0.5181  -0.1358   3.0631  

Coefficients:
                             Estimate Std. Error z value Pr(>|z|)    
(Intercept)                  -1.94027    0.06123 -31.686  < 2e-16 ***
occupationArmed-Forces      -10.62579  132.57607  -0.080  0.93612    
occupationCraft-repair        0.70014    0.07699   9.094  < 2e-16 ***
occupationExec-managerial     1.91171    0.07276  26.275  < 2e-16 ***
occupationFarming-fishing    -0.03095    0.13548  -0.228  0.81928    
occupationHandlers-cleaners  -0.60676    0.14262  -4.255 2.09e-05 ***
occupationMachine-op-inspct  -0.07784    0.10753  -0.724  0.46912    
occupationOther-service      -1.18778    0.12494  -9.506  < 2e-16 ***
occupationPriv-house-serv    -2.74186    1.00648  -2.724  0.00645 ** 
occupationProf-specialty      1.72773    0.07289  23.704  < 2e-16 ***
occupationProtective-serv     1.21252    0.11914  10.178  < 2e-16 ***
occupationSales               0.93585    0.07709  12.140  < 2e-16 ***
occupationTech-support        1.21450    0.10640  11.415  < 2e-16 ***
occupationTransport-moving    0.59881    0.09863   6.071 1.27e-09 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 22002  on 19604  degrees of freedom
Residual deviance: 19420  on 19591  degrees of freedom
AIC: 19448

Number of Fisher Scoring iterations: 11

Predicting the values on the test set.#### TO decide Accuracy we need to install ROCR. plot(acc)## Check for which valve accuracy get constant

pred_val<-predict(log_mod,newdata =censusTest1,type = "response")#probability
head(pred_val)
        2         5         8         9        12        15 
0.4928599 0.4470634 0.4928599 0.4470634 0.4470634 0.2072692 
range(pred_val)
[1] 3.488403e-06 4.928599e-01
install.packages("ROCR")
Error in install.packages : Updating loaded packages
library(ROCR)
predict_log_roc<-prediction(pred_val,censusTest1$X)
predict_log_roc
A prediction instance
  with 10557 data points
acc<-performance(predict_log_roc,"acc")
plot(acc)

table(censusData$X)

<=50K  >50K 
22654  7508 

Plot accuracy vs cut-off and pick an ideal value for cut-off.

lm.pred<-ifelse(pred_val>0.47,">50K","<=50K")  
Warning message:
package ‘ROCR’ was built under R version 3.6.3 
lm.pred
      2       5       8       9      12      15      18      20      21      22      23      24      25      26      27      29      30 
 ">50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" 
     32      35      37      46      47      51      52      54      55      56      61      62      63      64      67      69      71 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" 
     76      77      79      83      88      89      90      92      93      95      99     101     103     106     108     109     111 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
    119     121     122     124     125     126     127     131     133     141     145     146     147     152     153     154     157 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" 
    159     160     168     169     170     174     178     184     185     186     188     193     194     200     202     205     207 
"<=50K" "<=50K"  ">50K" "<=50K" "<=50K"  ">50K" "<=50K"  ">50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
    211     214     216     219     220     224     229     233     237     242     243     251     252     253     254     262     270 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
    273     274     275     276     282     283     288     289     293     300     301     303     305     306     315     320     322 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
    323     324     327     329     331     340     341     344     345     346     348     349     353     354     355     360     362 
"<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
    366     376     378     380     381     387     390     391     395     400     404     405     406     407     416     417     418 
"<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
    423     424     425     427     431     436     437     438     440     461     462     464     470     474     478     479     482 
 ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K"  ">50K" "<=50K" 
    485     492     494     495     500     504     507     512     522     523     527     530     534     536     539     541     542 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
    544     545     549     552     553     556     559     560     561     562     570     577     578     579     584     586     593 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" 
    595     596     597     598     599     600     605     606     608     611     615     616     617     621     622     623     627 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
    631     637     639     641     642     646     648     649     650     652     654     662     663     664     667     673     674 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K"  ">50K" "<=50K" "<=50K" 
    678     680     683     684     685     689     690     693     694     698     701     704     708     709     710     714     715 
"<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
    721     724     731     735     737     741     742     743     745     747     749     754     755     757     759     760     762 
 ">50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
    764     769     772     774     784     786     787     791     795     799     808     809     812     814     821     822     824 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
    827     828     829     830     836     844     850     852     854     855     857     865     866     867     868     869     870 
 ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" 
    874     876     880     881     882     883     885     887     888     893     895     898     906     908     909     913     914 
"<=50K" "<=50K"  ">50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K"  ">50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
    915     916     919     921     922     924     926     929     943     944     945     948     951     953     954     959     960 
"<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" 
    962     963     967     968     970     976     977     979     980     981     983     985     986     987     990     992     994 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
    995    1002    1003    1004    1011    1012    1019    1021    1024    1026    1027    1030    1034    1035    1036    1039    1041 
"<=50K" "<=50K"  ">50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" 
   1045    1048    1049    1050    1051    1053    1055    1060    1061    1066    1070    1071    1072    1073    1076    1078    1079 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
   1084    1085    1093    1096    1103    1116    1119    1123    1125    1126    1128    1131    1133    1134    1137    1139    1140 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
   1142    1143    1145    1148    1150    1154    1155    1165    1167    1171    1173    1177    1180    1181    1183    1184    1190 
 ">50K"  ">50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
   1193    1198    1199    1200    1201    1203    1208    1212    1216    1221    1225    1231    1232    1234    1243    1248    1249 
"<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
   1250    1252    1255    1258    1260    1262    1264    1267    1271    1277    1279    1281    1284    1285    1288    1290    1294 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
   1295    1303    1305    1317    1318    1323    1324    1325    1326    1329    1330    1331    1332    1333    1335    1337    1340 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K"  ">50K"  ">50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" 
   1341    1342    1343    1354    1357    1360    1364    1365    1366    1367    1373    1376    1377    1379    1382    1388    1392 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
   1393    1394    1396    1400    1402    1406    1412    1415    1416    1419    1421    1424    1425    1427    1434    1435    1439 
 ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
   1440    1442    1443    1448    1449    1452    1453    1455    1458    1468    1471    1473    1474    1481    1483    1484    1485 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" 
   1492    1494    1495    1501    1505    1506    1507    1511    1517    1519    1520    1524    1525    1526    1528    1536    1538 
"<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" 
   1540    1541    1542    1546    1547    1548    1549    1551    1563    1566    1567    1571    1572    1573    1574    1576    1578 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
   1579    1581    1582    1584    1585    1586    1587    1591    1595    1597    1598    1599    1603    1604    1609    1612    1614 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K"  ">50K" 
   1615    1623    1624    1625    1627    1631    1632    1639    1640    1641    1642    1646    1649    1650    1651    1657    1660 
 ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
   1662    1667    1668    1669    1671    1673    1677    1678    1679    1681    1685    1688    1689    1692    1693    1697    1702 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
   1705    1708    1709    1714    1717    1718    1719    1720    1721    1723    1733    1734    1735    1739    1741    1742    1744 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" 
   1745    1748    1749    1751    1753    1755    1757    1758    1761    1764    1765    1766    1781    1785    1786    1792    1793 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" 
   1797    1799    1802    1803    1806    1814    1817    1818    1820    1822    1823    1825    1832    1835    1838    1839    1840 
 ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
   1845    1849    1853    1854    1857    1859    1861    1862    1864    1869    1874    1877    1883    1890    1891    1892    1898 
"<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" 
   1899    1905    1907    1908    1912    1915    1916    1917    1921    1924    1928    1932    1933    1934    1941    1946    1948 
"<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" 
   1949    1952    1954    1955    1958    1960    1963    1970    1971    1976    1977    1980    1982    1983    1987    1989    1992 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K"  ">50K" 
   2002    2003    2006    2008    2013    2017    2018    2021    2022    2034    2035    2036    2039    2046    2048    2050    2055 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" 
   2061    2065    2069    2073    2078    2080    2081    2083    2084    2090    2091    2094    2100    2102    2103    2104    2105 
"<=50K"  ">50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
   2107    2114    2116    2120    2121    2131    2132    2135    2138    2140    2141    2144    2145    2148    2149    2150    2152 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
   2154    2155    2156    2157    2160    2161    2163    2165    2166    2170    2171    2176    2177    2182    2189    2191    2192 
"<=50K" "<=50K"  ">50K" "<=50K"  ">50K" "<=50K" "<=50K"  ">50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K"  ">50K" 
   2197    2199    2200    2201    2203    2205    2207    2209    2212    2213    2215    2217    2218    2219    2224    2226    2229 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" 
   2230    2232    2234    2235    2238    2240    2243    2255    2257    2258    2263    2264    2268    2269    2272    2274    2275 
"<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
   2283    2286    2291    2293    2295    2296    2298    2301    2302    2304    2308    2311    2312    2319    2320    2324    2327 
"<=50K"  ">50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" 
   2329    2331    2333    2335    2343    2349    2351    2353    2354    2358    2359    2360    2362    2368    2373    2374    2375 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" 
   2381    2387    2388    2392    2393    2398    2399    2401    2406    2407    2415    2423    2424    2426    2427    2430    2433 
"<=50K"  ">50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K"  ">50K" 
   2435    2436    2437    2438    2441    2444    2445    2452    2453    2455    2456    2460    2461    2462    2463    2464    2468 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" 
   2469    2472    2482    2483    2484    2485    2486    2488    2491    2492    2498    2499    2500    2502    2506    2508    2509 
 ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" 
   2515    2517    2519    2520    2521    2525    2527    2529    2530    2533    2536    2544    2548    2554    2559    2560    2563 
"<=50K" "<=50K" "<=50K"  ">50K" "<=50K"  ">50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
   2565    2571    2577    2583    2586    2591    2593    2594    2601    2610    2615    2618    2621    2622    2624    2626    2628 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
   2629    2630    2641    2644    2646    2650    2651    2652    2656    2658    2661    2665    2666    2667    2668    2669    2671 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K"  ">50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
   2672    2678    2681    2682    2688    2689    2691    2692    2697    2700    2704    2705    2711    2714    2717    2719    2721 
"<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" 
   2723    2724    2726    2728    2730    2732    2733    2734    2735    2737    2738    2739    2740    2744    2746    2747    2752 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K"  ">50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" 
   2754    2759    2762    2763    2764    2767    2768    2769    2773    2775    2782    2787    2794    2795 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
 [ reached getOption("max.print") -- omitted 9557 entries ]

Build a confusion matrix and find the accuracy.

tab<-table(lm.pred,censusTest1$X)
tab
       
lm.pred <=50K >50K
  <=50K  7188 1968
  >50K    741  660

#TP FP #FN TN #TP TN -correctly predicted #FP FN - wrongly predicted

(7188+660)/(7188+660+1968+741)
[1] 0.743393
accuracy<-sum(diag(tab))/sum(tab)
accuracy
[1] 0.743393

Plot the ROC curve and find the auc(Area Under Curve).

install.packages("caTools")
WARNING: Rtools is required to build R packages but is not currently installed. Please download and install the appropriate version of Rtools before proceeding:

https://cran.rstudio.com/bin/windows/Rtools/
Installing package into 㤼㸱C:/Users/ADMIN/Documents/R/win-library/3.6㤼㸲
(as 㤼㸱lib㤼㸲 is unspecified)
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.6/caTools_1.18.0.zip'
Content type 'application/zip' length 330351 bytes (322 KB)
downloaded 322 KB
package ‘caTools’ successfully unpacked and MD5 sums checked

The downloaded binary packages are in
    C:\Users\ADMIN\AppData\Local\Temp\Rtmpyaqneh\downloaded_packages
 library("caTools")
package 㤼㸱caTools㤼㸲 was built under R version 3.6.3
roc<-performance(predict_log_roc,"tpr","fpr")
plot(roc)

performance(predict_log_roc, "auc")->auc
auc
A performance instance
  'Area under the ROC curve'
auc<-auc@y.values[[1]]
auc
[1] 0.7224386
split_data1<- sample.split(censusData$X,SplitRatio = 0.80)
censusTrain2<-subset(censusData,split_data1==T)
censusTest2<-subset(censusData,split_data1==F)

log_mod2<-glm(X~age+workclass+education,data=censusTrain2,family = "binomial")
summary(log_mod2)

Call:
glm(formula = X ~ age + workclass + education, family = "binomial", 
    data = censusTrain2)

Deviance Residuals: 
     Min        1Q    Median        3Q       Max  
-2.45996  -0.71258  -0.48153  -0.00092   2.89674  

Coefficients:
                            Estimate Std. Error z value Pr(>|z|)    
(Intercept)                -4.045540   0.185147 -21.850  < 2e-16 ***
age                         0.043331   0.001357  31.937  < 2e-16 ***
workclassLocal-gov         -0.535418   0.102298  -5.234 1.66e-07 ***
workclassPrivate           -0.350398   0.084826  -4.131 3.62e-05 ***
workclassSelf-emp-inc       0.691465   0.112371   6.153 7.58e-10 ***
workclassSelf-emp-not-inc  -0.433828   0.099532  -4.359 1.31e-05 ***
workclassState-gov         -0.645090   0.114088  -5.654 1.56e-08 ***
workclassWithout-pay      -13.855967 244.920373  -0.057   0.9549    
education11th              -0.020626   0.211948  -0.097   0.9225    
education12th               0.293045   0.269787   1.086   0.2774    
education1st-4th           -1.140963   0.534504  -2.135   0.0328 *  
education5th-6th           -0.533254   0.347338  -1.535   0.1247    
education7th-8th           -0.607732   0.252210  -2.410   0.0160 *  
education9th               -0.550901   0.288260  -1.911   0.0560 .  
educationAssoc-acdm         1.563876   0.173313   9.023  < 2e-16 ***
educationAssoc-voc          1.631505   0.167947   9.714  < 2e-16 ***
educationBachelors          2.344929   0.154836  15.145  < 2e-16 ***
educationDoctorate          3.579137   0.204748  17.481  < 2e-16 ***
educationHS-grad            0.971806   0.153985   6.311 2.77e-10 ***
educationMasters            2.804354   0.161939  17.317  < 2e-16 ***
educationPreschool        -12.098579 136.567513  -0.089   0.9294    
educationProf-school        3.642187   0.191060  19.063  < 2e-16 ***
educationSome-college       1.303126   0.155197   8.397  < 2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 27079  on 24128  degrees of freedom
Residual deviance: 22478  on 24106  degrees of freedom
AIC: 22524

Number of Fisher Scoring iterations: 13
pred_val<-predict(log_mod2,newdata =censusTest2,type = "response")
head(pred_val)
        3         5         9        10        12        19 
0.1446068 0.3020126 0.4382387 0.4424821 0.2600398 0.5469194 

##library(ROCR) ## TO decide Accuracy

library(ROCR)
predict_log_roc<-prediction(pred_val,censusTest2$X)
predict_log_roc
A prediction instance
  with 6033 data points
acc<-performance(predict_log_roc,"acc")
plot(acc)

lm.pred<-ifelse(pred_val>0.45,">50K","<=50K")  
lm.pred
      3       5       9      10      12      19      27      31      33      34      38      45      49      57      66      72      92 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K"  ">50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" 
    106     121     122     127     130     131     135     139     142     155     158     162     169     170     171     173     175 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K"  ">50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
    181     185     186     188     208     216     217     228     229     230     231     237     240     242     253     257     259 
 ">50K"  ">50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
    266     268     273     281     291     294     298     300     301     320     326     330     331     343     346     349     351 
 ">50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
    359     361     366     367     370     379     384     395     397     400     403     417     425     432     447     454     455 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K"  ">50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
    456     466     467     469     482     488     493     496     499     503     508     509     513     517     520     524     527 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K"  ">50K" "<=50K" "<=50K" 
    528     529     530     534     535     542     554     556     557     562     564     568     573     580     582     595     610 
"<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K"  ">50K" 
    614     623     631     633     634     636     637     640     647     649     655     668     672     676     685     691     694 
"<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
    698     699     709     712     715     718     722     739     742     747     751     753     756     765     770     777     779 
"<=50K" "<=50K"  ">50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
    783     785     788     792     793     796     797     798     807     808     810     811     813     814     818     821     848 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
    851     852     863     864     867     868     874     876     886     889     895     897     900     906     907     919     922 
"<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K"  ">50K" 
    927     932     934     938     940     942     943     944     952     953     955     957     964     969     984     989     990 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" 
    998    1001    1004    1009    1012    1016    1022    1029    1030    1032    1037    1043    1046    1062    1070    1074    1078 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
   1083    1088    1092    1093    1101    1107    1111    1113    1117    1123    1126    1139    1142    1143    1146    1153    1167 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" 
   1191    1197    1200    1208    1218    1222    1226    1227    1228    1247    1250    1251    1260    1261    1270    1274    1275 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K"  ">50K" "<=50K" "<=50K" "<=50K" 
   1280    1283    1285    1288    1290    1299    1303    1304    1318    1319    1320    1321    1330    1336    1339    1340    1341 
"<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" 
   1346    1353    1362    1372    1381    1387    1394    1398    1400    1402    1405    1409    1419    1426    1429    1430    1439 
"<=50K"  ">50K"  ">50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
   1443    1445    1456    1457    1467    1472    1475    1485    1493    1494    1496    1497    1504    1507    1515    1516    1520 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" 
   1521    1533    1534    1540    1548    1556    1557    1561    1577    1582    1584    1585    1587    1604    1609    1620    1621 
 ">50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" 
   1623    1625    1634    1636    1637    1644    1645    1646    1648    1649    1656    1681    1682    1683    1685    1693    1696 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
   1706    1713    1717    1720    1722    1723    1725    1733    1734    1740    1741    1744    1746    1752    1760    1770    1775 
"<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" 
   1778    1789    1791    1792    1795    1798    1801    1816    1824    1825    1830    1832    1838    1842    1844    1846    1848 
"<=50K" "<=50K"  ">50K"  ">50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
   1854    1869    1873    1876    1883    1889    1892    1896    1899    1904    1905    1909    1912    1913    1914    1923    1924 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
   1929    1930    1939    1947    1951    1952    1954    1956    1957    1960    1968    1970    1971    1979    1986    1988    1991 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
   2000    2019    2039    2043    2070    2073    2076    2078    2085    2087    2092    2093    2098    2104    2115    2123    2127 
"<=50K" "<=50K"  ">50K" "<=50K"  ">50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K"  ">50K"  ">50K" 
   2130    2139    2143    2156    2157    2160    2170    2173    2175    2176    2179    2182    2185    2193    2194    2199    2219 
"<=50K" "<=50K" "<=50K"  ">50K" "<=50K"  ">50K" "<=50K"  ">50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" 
   2227    2229    2240    2251    2256    2259    2264    2268    2275    2277    2286    2290    2293    2309    2319    2320    2325 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
   2329    2342    2355    2363    2364    2370    2374    2379    2380    2388    2399    2409    2416    2426    2429    2430    2434 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K"  ">50K" "<=50K" 
   2439    2442    2455    2456    2458    2463    2467    2476    2482    2487    2488    2491    2492    2495    2497    2499    2501 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" 
   2513    2522    2524    2525    2541    2545    2554    2563    2566    2573    2577    2581    2584    2585    2587    2590    2592 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" 
   2604    2606    2608    2614    2618    2621    2634    2641    2642    2657    2661    2665    2666    2669    2672    2674    2675 
"<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
   2682    2683    2685    2692    2696    2699    2700    2703    2711    2719    2729    2732    2734    2741    2749    2758    2763 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
   2777    2783    2787    2791    2792    2797    2798    2801    2810    2815    2831    2834    2835    2841    2844    2846    2848 
 ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" 
   2876    2882    2884    2885    2890    2894    2898    2903    2904    2908    2917    2922    2924    2928    2929    2932    2934 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
   2937    2960    2962    2965    2967    2970    2972    2976    2993    3006    3012    3017    3034    3036    3037    3041    3043 
"<=50K" "<=50K"  ">50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
   3045    3052    3056    3059    3069    3073    3075    3083    3086    3087    3091    3098    3115    3120    3123    3126    3129 
"<=50K"  ">50K"  ">50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" 
   3133    3140    3156    3158    3161    3174    3176    3179    3184    3191    3192    3199    3202    3210    3214    3217    3230 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" 
   3231    3236    3238    3245    3251    3254    3255    3256    3265    3275    3278    3280    3282    3300    3323    3333    3341 
 ">50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K"  ">50K" 
   3347    3353    3367    3369    3380    3384    3385    3393    3396    3397    3398    3399    3416    3420    3422    3424    3425 
"<=50K"  ">50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" 
   3429    3430    3434    3436    3439    3440    3443    3450    3455    3460    3468    3469    3470    3471    3477    3481    3482 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" 
   3483    3485    3487    3496    3499    3500    3501    3511    3516    3517    3521    3524    3528    3529    3534    3535    3546 
"<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
   3561    3564    3572    3574    3577    3582    3588    3596    3612    3617    3623    3624    3634    3642    3643    3648    3651 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" 
   3654    3656    3660    3661    3667    3669    3684    3692    3695    3703    3710    3712    3713    3721    3726    3727    3733 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
   3740    3741    3749    3756    3760    3761    3762    3765    3769    3773    3783    3785    3787    3795    3801    3802    3805 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" 
   3807    3811    3815    3822    3827    3833    3836    3846    3867    3869    3872    3885    3895    3915    3927    3938    3944 
 ">50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K"  ">50K" "<=50K" 
   3945    3946    3950    3952    3954    3962    3963    3973    3985    3989    3990    3991    3994    4000    4004    4007    4010 
"<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" 
   4012    4018    4028    4032    4036    4047    4054    4058    4062    4063    4072    4080    4089    4095    4103    4116    4117 
"<=50K"  ">50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
   4121    4123    4125    4129    4140    4142    4149    4157    4159    4161    4163    4167    4171    4173    4176    4179    4184 
 ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" 
   4190    4204    4215    4217    4219    4222    4223    4239    4249    4259    4260    4271    4272    4277    4280    4289    4296 
 ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
   4312    4315    4320    4322    4324    4326    4329    4330    4337    4339    4341    4345    4347    4351    4353    4362    4363 
"<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K"  ">50K" "<=50K" "<=50K"  ">50K" 
   4368    4369    4370    4371    4374    4379    4382    4395    4402    4403    4420    4426    4428    4429    4430    4435    4446 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" 
   4449    4451    4455    4461    4463    4464    4471    4477    4478    4479    4483    4484    4487    4490    4493    4500    4502 
"<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" 
   4503    4504    4507    4511    4513    4514    4515    4516    4520    4521    4529    4530    4535    4555    4556    4559    4567 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K"  ">50K" "<=50K"  ">50K" "<=50K" 
   4570    4573    4579    4584    4586    4587    4589    4597    4605    4608    4611    4615    4638    4650    4654    4656    4657 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
   4664    4677    4682    4690    4693    4704    4705    4709    4719    4720    4729    4732    4736    4754    4757    4760    4761 
"<=50K" "<=50K"  ">50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K"  ">50K"  ">50K" "<=50K" 
   4779    4781    4788    4793    4795    4806    4807    4808    4824    4832    4840    4841    4844    4846    4848    4856    4869 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
   4873    4874    4877    4890    4892    4900    4921    4922    4924    4943    4945    4947    4950    4953    4954    4973    4974 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" 
   4975    4977    4986    4995    5005    5011    5013    5016    5018    5026    5028    5032    5035    5036    5037    5042    5046 
"<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" 
   5049    5052    5055    5063    5072    5080    5083    5086    5095    5096    5103    5106    5118    5125 
"<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K" "<=50K"  ">50K" 
 [ reached getOption("max.print") -- omitted 5033 entries ]
tab<-table(lm.pred,censusTest2$X)
tab
       
lm.pred <=50K >50K
  <=50K  4166  966
  >50K    365  536
accuracy<-sum(diag(tab))/sum(tab)
accuracy
[1] 0.7793801
roc<-performance(predict_log_roc,"tpr","fpr")
plot(roc)

performance(predict_log_roc, "auc")->auc
auc
A performance instance
  'Area under the ROC curve'
auc<-auc@y.values[[1]]
auc
[1] 0.7832074

#Prediction building a Decision Tree Model: ## Divide the dataset into training and test sets in 70:30 ratio.

set.seed(123)
install.packages("caTools")
Error in install.packages : Updating loaded packages
 library("caTools")
split_data<-sample.split(censusData,SplitRatio = 0.70)
censusTrain<-subset(censusData,split_data==T)
censusTest<-subset(censusData,split_data==F)
nrow(censusTrain)
[1] 20107
nrow(censusTest)
[1] 10055
# Build a decision tree model where the dependent variable is "X"(Yearly Income) and the rest of the variables as independent variables
library(rpart)
library(rpart.plot) 
package 㤼㸱rpart.plot㤼㸲 was built under R version 3.6.3
census_model<-rpart(formula = X~.,
                    data = censusTrain,
                    method = "class")

Plot the decision tree

library(rpart)
library(rpart.plot)
package 㤼㸱rpart.plot㤼㸲 was built under R version 3.6.3
rpart.plot(x= census_model, type= 5, extra = 0,tweak = 1.5)

Predict the values on the test set

class_prediction<-predict(census_model,
                          newdata = censusTest,
                          type = "class")
class_prediction
    2     4     5     8    11    17    19    20    23    26    32    34    35    38    41    47    49    50    53    56    62    64 
 >50K <=50K  >50K <=50K <=50K <=50K <=50K  >50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K  >50K <=50K <=50K <=50K <=50K <=50K 
   65    68    71    77    79    80    83    86    92    94    95    98   101   107   109   110   113   116   122   124   125   128 
<=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K  >50K <=50K  >50K <=50K <=50K <=50K <=50K  >50K <=50K <=50K  >50K <=50K <=50K 
  131   137   139   140   143   146   152   154   155   158   161   167   169   170   173   176   182   184   185   188   191   197 
<=50K <=50K <=50K <=50K <=50K <=50K  >50K <=50K <=50K <=50K <=50K  >50K <=50K <=50K <=50K  >50K <=50K  >50K  >50K  >50K <=50K <=50K 
  199   200   203   206   212   214   215   218   221   227   229   230   233   236   242   244   245   248   251   257   259   260 
<=50K <=50K  >50K <=50K <=50K <=50K <=50K <=50K  >50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K  >50K <=50K 
  263   266   272   274   275   278   281   287   289   290   293   296   302   304   305   308   311   317   319   320   323   326 
<=50K  >50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K  >50K <=50K <=50K  >50K <=50K <=50K <=50K <=50K <=50K  >50K <=50K <=50K <=50K 
  332   334   335   338   341   347   349   350   353   356   362   364   365   368   371   377   379   380   383   386   392   394 
<=50K <=50K  >50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K  >50K  >50K <=50K <=50K <=50K <=50K <=50K <=50K 
  395   398   401   407   409   410   413   416   422   424   425   428   431   437   439   440   443   446   452   454   455   458 
<=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K  >50K <=50K <=50K <=50K <=50K <=50K <=50K  >50K <=50K <=50K <=50K 
  461   467   469   470   473   476   482   484   485   488   491   497   499   500   503   506   512   514   515   518   521   527 
<=50K <=50K <=50K <=50K <=50K <=50K <=50K  >50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K 
  529   530   533   536   542   544   545   548   551   557   559   560   563   566   572   574   575   578   581   587   589   590 
<=50K <=50K <=50K <=50K <=50K <=50K  >50K  >50K <=50K  >50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K  >50K 
  593   596   602   604   605   608   611   617   619   620   623   626   632   634   635   638   641   647   649   650   653   656 
<=50K <=50K  >50K <=50K <=50K <=50K <=50K <=50K <=50K  >50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K  >50K <=50K <=50K <=50K 
  662   664   665   668   671   677   679   680   683   686   692   694   695   698   701   707   709   710   713   716   722   724 
<=50K <=50K <=50K  >50K <=50K <=50K <=50K  >50K  >50K  >50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K 
  725   728   731   737   739   740   743   746   752   754   755   758   761   767   769   770   773   776   782   784   785   788 
<=50K  >50K <=50K <=50K <=50K <=50K  >50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K 
  791   797   799   800   803   806   812   814   815   818   821   827   829   830   833   836   842   844   845   848   851   857 
<=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K  >50K <=50K <=50K  >50K <=50K <=50K <=50K <=50K <=50K  >50K <=50K <=50K <=50K <=50K 
  859   860   863   866   872   874   875   878   881   887   889   890   893   896   902   904   905   908   911   917   919   920 
<=50K <=50K  >50K <=50K <=50K <=50K <=50K <=50K <=50K  >50K <=50K <=50K  >50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K 
  923   926   932   934   935   938   941   947   949   950   953   956   962   964   965   968   971   977   979   980   983   986 
<=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K  >50K  >50K  >50K <=50K <=50K <=50K <=50K <=50K 
  992   994   995   998  1001  1007  1009  1010  1013  1016  1022  1024  1025  1028  1031  1037  1039  1040  1043  1046  1052  1054 
<=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K  >50K  >50K  >50K <=50K  >50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K 
 1055  1058  1061  1067  1069  1070  1073  1076  1082  1084  1085  1088  1091  1097  1099  1100  1103  1106  1112  1114  1115  1118 
<=50K <=50K <=50K  >50K <=50K <=50K  >50K  >50K <=50K <=50K <=50K  >50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K  >50K 
 1121  1127  1129  1130  1133  1136  1142  1144  1145  1148  1151  1157  1159  1160  1163  1166  1172  1174  1175  1178  1181  1187 
<=50K <=50K  >50K  >50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K  >50K <=50K <=50K <=50K <=50K <=50K  >50K  >50K <=50K  >50K <=50K 
 1189  1190  1193  1196  1202  1204  1205  1208  1211  1217  1219  1220  1223  1226  1232  1234  1235  1238  1241  1247  1249  1250 
 >50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K  >50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K  >50K <=50K <=50K <=50K <=50K 
 1253  1256  1262  1264  1265  1268  1271  1277  1279  1280  1283  1286  1292  1294  1295  1298  1301  1307  1309  1310  1313  1316 
<=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K  >50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K  >50K <=50K <=50K 
 1322  1324  1325  1328  1331  1337  1339  1340  1343  1346  1352  1354  1355  1358  1361  1367  1369  1370  1373  1376  1382  1384 
<=50K <=50K <=50K <=50K <=50K <=50K  >50K  >50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K  >50K <=50K <=50K <=50K 
 1385  1388  1391  1397  1399  1400  1403  1406  1412  1414  1415  1418  1421  1427  1429  1430  1433  1436  1442  1444  1445  1448 
<=50K <=50K  >50K <=50K  >50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K 
 1451  1457  1459  1460  1463  1466  1472  1474  1475  1478  1481  1487  1489  1490  1493  1496  1502  1504  1505  1508  1511  1517 
<=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K  >50K  >50K  >50K <=50K <=50K 
 1519  1520  1523  1526  1532  1534  1535  1538  1541  1547  1549  1550  1553  1556  1562  1564  1565  1568  1571  1577  1579  1580 
<=50K <=50K <=50K  >50K <=50K  >50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K  >50K  >50K <=50K <=50K <=50K <=50K  >50K <=50K <=50K 
 1583  1586  1592  1594  1595  1598  1601  1607  1609  1610  1613  1616  1622  1624  1625  1628  1631  1637  1639  1640  1643  1646 
<=50K <=50K <=50K <=50K  >50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K  >50K <=50K  >50K  >50K <=50K <=50K <=50K  >50K <=50K <=50K 
 1652  1654  1655  1658  1661  1667  1669  1670  1673  1676  1682  1684  1685  1688  1691  1697  1699  1700  1703  1706  1712  1714 
<=50K <=50K  >50K  >50K <=50K  >50K <=50K <=50K  >50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K 
 1715  1718  1721  1727  1729  1730  1733  1736  1742  1744  1745  1748  1751  1757  1759  1760  1763  1766  1772  1774  1775  1778 
<=50K <=50K <=50K  >50K <=50K  >50K <=50K <=50K  >50K <=50K  >50K <=50K <=50K <=50K <=50K  >50K  >50K <=50K <=50K <=50K <=50K <=50K 
 1781  1787  1789  1790  1793  1796  1802  1804  1805  1808  1811  1817  1819  1820  1823  1826  1832  1834  1835  1838  1841  1847 
 >50K <=50K  >50K <=50K <=50K  >50K <=50K <=50K <=50K  >50K <=50K  >50K <=50K  >50K <=50K <=50K <=50K <=50K <=50K <=50K  >50K <=50K 
 1849  1850  1853  1856  1862  1864  1865  1868  1871  1877  1879  1880  1883  1886  1892  1894  1895  1898  1901  1907  1909  1910 
<=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K  >50K <=50K <=50K <=50K  >50K <=50K  >50K <=50K <=50K <=50K 
 1913  1916  1922  1924  1925  1928  1931  1937  1939  1940  1943  1946  1952  1954  1955  1958  1961  1967  1969  1970  1973  1976 
<=50K <=50K <=50K <=50K <=50K  >50K <=50K <=50K <=50K  >50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K 
 1982  1984  1985  1988  1991  1997  1999  2000  2003  2006  2012  2014  2015  2018  2021  2027  2029  2030  2033  2036  2042  2044 
<=50K  >50K  >50K <=50K <=50K <=50K <=50K  >50K <=50K <=50K <=50K <=50K <=50K  >50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K 
 2045  2048  2051  2057  2059  2060  2063  2066  2072  2074  2075  2078  2081  2087  2089  2090  2093  2096  2102  2104  2105  2108 
<=50K <=50K  >50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K  >50K <=50K <=50K <=50K <=50K <=50K <=50K 
 2111  2117  2119  2120  2123  2126  2132  2134  2135  2138  2141  2147  2149  2150  2153  2156  2162  2164  2165  2168  2171  2177 
<=50K <=50K <=50K <=50K  >50K <=50K <=50K <=50K  >50K  >50K <=50K <=50K  >50K <=50K <=50K  >50K <=50K <=50K <=50K  >50K <=50K <=50K 
 2179  2180  2183  2186  2192  2194  2195  2198  2201  2207  2209  2210  2213  2216  2222  2224  2225  2228  2231  2237  2239  2240 
<=50K <=50K <=50K <=50K <=50K  >50K  >50K <=50K <=50K <=50K <=50K  >50K <=50K <=50K  >50K <=50K <=50K <=50K <=50K  >50K <=50K <=50K 
 2243  2246  2252  2254  2255  2258  2261  2267  2269  2270  2273  2276  2282  2284  2285  2288  2291  2297  2299  2300  2303  2306 
<=50K <=50K <=50K <=50K <=50K <=50K <=50K  >50K <=50K <=50K <=50K <=50K <=50K  >50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K  >50K 
 2312  2314  2315  2318  2321  2327  2329  2330  2333  2336  2342  2344  2345  2348  2351  2357  2359  2360  2363  2366  2372  2374 
<=50K  >50K  >50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K  >50K <=50K <=50K <=50K  >50K <=50K <=50K <=50K 
 2375  2378  2381  2387  2389  2390  2393  2396  2402  2404  2405  2408  2411  2417  2419  2420  2423  2426  2432  2434  2435  2438 
 >50K  >50K  >50K <=50K <=50K  >50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K  >50K 
 2441  2447  2449  2450  2453  2456  2462  2464  2465  2468  2471  2477  2479  2480  2483  2486  2492  2494  2495  2498  2501  2507 
<=50K <=50K <=50K <=50K <=50K <=50K <=50K  >50K  >50K <=50K  >50K <=50K <=50K <=50K  >50K <=50K  >50K <=50K <=50K <=50K <=50K <=50K 
 2509  2510  2513  2516  2522  2524  2525  2528  2531  2537  2539  2540  2543  2546  2552  2554  2555  2558  2561  2567  2569  2570 
<=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K  >50K <=50K 
 2573  2576  2582  2584  2585  2588  2591  2597  2599  2600  2603  2606  2612  2614  2615  2618  2621  2627  2629  2630  2633  2636 
<=50K <=50K <=50K <=50K <=50K <=50K <=50K  >50K <=50K <=50K <=50K <=50K <=50K <=50K  >50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K 
 2642  2644  2645  2648  2651  2657  2659  2660  2663  2666  2672  2674  2675  2678  2681  2687  2689  2690  2693  2696  2702  2704 
<=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K  >50K  >50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K 
 2705  2708  2711  2717  2719  2720  2723  2726  2732  2734  2735  2738  2741  2747  2749  2750  2753  2756  2762  2764  2765  2768 
<=50K <=50K  >50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K 
 2771  2777  2779  2780  2783  2786  2792  2794  2795  2798  2801  2807  2809  2810  2813  2816  2822  2824  2825  2828  2831  2837 
 >50K  >50K <=50K <=50K  >50K <=50K <=50K <=50K <=50K <=50K  >50K <=50K <=50K <=50K <=50K <=50K  >50K <=50K <=50K <=50K <=50K <=50K 
 2839  2840  2843  2846  2852  2854  2855  2858  2861  2867  2869  2870  2873  2876  2882  2884  2885  2888  2891  2897  2899  2900 
<=50K  >50K  >50K <=50K <=50K  >50K  >50K  >50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K  >50K <=50K  >50K  >50K <=50K 
 2903  2906  2912  2914  2915  2918  2921  2927  2929  2930  2933  2936  2942  2944  2945  2948  2951  2957  2959  2960  2963  2966 
<=50K <=50K <=50K <=50K <=50K  >50K <=50K  >50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K <=50K  >50K <=50K 
 2972  2974  2975  2978  2981  2987  2989  2990  2993  2996 
<=50K <=50K <=50K <=50K  >50K <=50K <=50K <=50K <=50K  >50K 
 [ reached getOption("max.print") -- omitted 9055 entries ]
Levels: <=50K >50K

#TP FP #FN TN #TP TN -correctly predicted #FP FN - wrongly predicted # Build a confusion matrix and calculate the accuracy

tab<-table(class_prediction,censusTest$X)
tab
                
class_prediction <=50K >50K
           <=50K  7185 1256
           >50K    363 1251
sum(diag(tab))/sum(tab)
[1] 0.8389856

#Prediction building a random Forest: ## let’s build a random forest model ## Divide the dataset into training and test sets in 80:20 ratio

set.seed(123)
install.packages("caTools")
Error in install.packages : Updating loaded packages
 library("caTools")
split_data<-sample.split(censusData$X,SplitRatio = 0.8)
censusTrain<-subset(censusData,split_data==T)
censusTest<-subset(censusData,split_data==F)
nrow(censusTrain)
[1] 24129
nrow(censusTest)
[1] 6033

now let’s build a random forest model where the dependent variable is “X”(Yearly Income) and the rest of the variables as independent variables and number of trees as 300.

library(randomForest)

census_model<-randomForest(formula=X~.,
                           data=censusTrain,
                           ntree=300)

plot(census_model)

text(census_model)
Error in xy.coords(x, y, recycle = TRUE, setLab = FALSE) : 
  'x' is a list, but does not have components 'x' and 'y'

Predict values on the test set

cenus_prediction<-predict(census_model,
                          newdata = censusTest,
                          type = "class")

Build a confusion matrix and calculate the accuracy

tab<-table(cenus_prediction,censusTest$X)
tab
                
cenus_prediction <=50K >50K
           <=50K  4208  503
           >50K    323  999
sum(diag(tab))/sum(tab)
[1] 0.8630864
LS0tDQp0aXRsZTogIkNlbnN1cyBJbmNvbWUgUHJvamVjdCINCm91dHB1dDogaHRtbF9ub3RlYm9vaw0KLS0tDQoNCg0KI0RhdGEgUHJlcHJvY2Vzc2luZzoNCiMjUmVwbGFjZSBhbGwgdGhlIG1pc3NpbmcgdmFsdWVzIHdpdGggTkEuDQpgYGB7cn0NCmxpYnJhcnkocGxvdGx5KQ0KYGBgDQojI0xldCdzIHRha2UgYSBsb29rIGF0IHRoZSBkYXRhIHN0cnVjdHVyZQ0KYGBge3J9DQpzdHIoY2Vuc3VzRGF0YSkNCmBgYA0KDQojIyBjb252ZXJ0IGNvbHVtbiBpbnRvIGNoYXJhY3Rlci4gQmVjYXVzZSB0aGUgZGF0YXNldCB3YXMgaW1wb3J0ZWQgYXMgRmFjdG9yLCB3ZSBuZWVkIHRvIGNoYW5nZSB0aGUgY29sdW1ucyBpbnRvIGNoYXJhY3RlcnMgc28gd2UgY2FuIG1hbmlwdWxhdGUgdGhlIGRhdGENCg0KYGBge3J9DQpjZW5zdXNEYXRhJHdvcmtjbGFzczwtYXMuY2hhcmFjdGVyKGNlbnN1c0RhdGEkd29ya2NsYXNzKQ0KIGNlbnN1c0RhdGEkb2NjdXBhdGlvbjwtYXMuY2hhcmFjdGVyKGNlbnN1c0RhdGEkb2NjdXBhdGlvbikNCiBjZW5zdXNEYXRhJG5hdGl2ZS5jb3VudHJ5PC1hcy5jaGFyYWN0ZXIoY2Vuc3VzRGF0YSRuYXRpdmUuY291bnRyeSkNCiBjZW5zdXNEYXRhJGVkdWNhdGlvbjwtYXMuY2hhcmFjdGVyKGNlbnN1c0RhdGEkZWR1Y2F0aW9uKQ0KIGNlbnN1c0RhdGEkbWFyaXRhbC5zdGF0dXM8LWFzLmNoYXJhY3RlcihjZW5zdXNEYXRhJG1hcml0YWwuc3RhdHVzKQ0KIGNlbnN1c0RhdGEkcmVsYXRpb25zaGlwPC1hcy5jaGFyYWN0ZXIoY2Vuc3VzRGF0YSRyZWxhdGlvbnNoaXApDQogY2Vuc3VzRGF0YSRyYWNlPC1hcy5jaGFyYWN0ZXIoY2Vuc3VzRGF0YSRyYWNlKQ0KIGNlbnN1c0RhdGEkc2V4PC1hcy5jaGFyYWN0ZXIoY2Vuc3VzRGF0YSRzZXgpDQogY2Vuc3VzRGF0YSRYPC1hcy5jaGFyYWN0ZXIoY2Vuc3VzRGF0YSRYKQ0KYGBgDQoNCiNOb3csIGxldCdzIGxvb2sgYXQgdGhlIG5ldyBzdHJ1Y3R1cmUNCmBgYHtyfQ0Kc3RyKGNlbnN1c0RhdGEpDQpgYGANCiMjTGV0IGxvb2sgYXQgdGhlIG1pc3NpbmcgdmFsdWUgaW4gdGhlIGNvbHVtbnMNCiAgIA0KDQpgYGB7cn0NCnRhYmxlKGlzLm5hKGNlbnN1c0RhdGEpKQ0KY2Vuc3VzRGF0YVtjZW5zdXNEYXRhPT0iID8iXTwtIE5BDQpgYGANCg0KIyNSZW1vdmUgYWxsIHRoZSByb3dzIHRoYXQgY29udGFpbiBOQSB2YWx1ZXMuDQpgYGB7cn0NCmNlbnN1c0RhdGEgPC1uYS5vbWl0KGNlbnN1c0RhdGEpDQpgYGANCg0KIyNSZW1vdmUgYWxsIHdoaXRlc3BhY2VzIGZyb20gdGhlIGNvbHVtbnMuDQpgYGB7cn0NCmluc3RhbGwucGFja2FnZXMoInN0cmluZ3IiKQ0KaW5zdGFsbC5wYWNrYWdlcygiZHBseXIiKQ0KbGlicmFyeShzdHJpbmdyKSANCmxpYnJhcnkoZHBseXIpDQpjZW5zdXNEYXRhPC1tdXRhdGVfaWYoY2Vuc3VzRGF0YSwgaXMuY2hhcmFjdGVyLCBzdHJfdHJpbSkNCmBgYA0KDQojI0luIG9yZGVyIHRvIHdvcmsgd2l0aCBtb2RlbHMsIHZpenVhbGl6ZSwgd2UgbmVlZCB0byBjb252ZXJ0IHRoZW0gaW50byBmYWN0b3JzDQpgYGB7cn0NCmNlbnN1c0RhdGEkd29ya2NsYXNzPC1hcy5mYWN0b3IoY2Vuc3VzRGF0YSR3b3JrY2xhc3MpDQpjZW5zdXNEYXRhJG9jY3VwYXRpb248LWFzLmZhY3RvcihjZW5zdXNEYXRhJG9jY3VwYXRpb24pDQpjZW5zdXNEYXRhJG5hdGl2ZS5jb3VudHJ5PC1hcy5mYWN0b3IoY2Vuc3VzRGF0YSRuYXRpdmUuY291bnRyeSkNCmNlbnN1c0RhdGEkZWR1Y2F0aW9uPC1hcy5mYWN0b3IoY2Vuc3VzRGF0YSRlZHVjYXRpb24pDQpjZW5zdXNEYXRhJG1hcml0YWwuc3RhdHVzPC1hcy5mYWN0b3IoY2Vuc3VzRGF0YSRtYXJpdGFsLnN0YXR1cykNCmNlbnN1c0RhdGEkcmVsYXRpb25zaGlwPC1hcy5mYWN0b3IoY2Vuc3VzRGF0YSRyZWxhdGlvbnNoaXApDQpjZW5zdXNEYXRhJHJhY2U8LWFzLmZhY3RvcihjZW5zdXNEYXRhJHJhY2UpDQpjZW5zdXNEYXRhJHNleDwtYXMuZmFjdG9yKGNlbnN1c0RhdGEkc2V4KQ0KY2Vuc3VzRGF0YSRYPC1hcy5mYWN0b3IoY2Vuc3VzRGF0YSRYKQ0KYGBgDQoNCg0KYGBge3J9DQpzdHIoY2Vuc3VzRGF0YSkNCmBgYA0KDQojbGV0J3Mgbm93IHBsb3QNCmBgYHtyfQ0KcGxvdChjZW5zdXNEYXRhJFgpDQpgYGANCg0KIyNEYXRhIE1hbmlwdWxhdGlvbjogSW4gdGhpcyBwaGFzZSBJIHdpbGwgcGVyZm9ybSBkYXRhIG1hbmlwdWxhdGlvbiB0byBhbmFseXplIHRoZSBkYXRhIHNldCB1c2luZyB2YXJpb3VzIGZ1bmN0aW9ucyBmcm9tIHRoZSBkcGx5ciBwYWNrYWdlDQpgYGB7cn0NCnN1bW1hcnkoY2Vuc3VzRGF0YSkNCmBgYA0KDQojI0V4dHJhY3QgdGhlICJlZHVjYXRpb24iIGNvbHVtbiBhbmQgc3RvcmUgaXQgaW4gImNlbnN1c19lZCINCmBgYHtyfQ0KY2Vuc3VzX2VkPC1jZW5zdXNEYXRhJGVkdWNhdGlvbg0KVmlldyhjZW5zdXNfZWQpDQpjbGFzcyhjZW5zdXNfZWQpDQpoZWFkKGNlbnN1c19lZCkNCmBgYA0KDQojI0V4dHJhY3QgYWxsIHRoZSBjb2x1bW5zIGZyb20gImFnZSIgdG8gInJlbGF0aW9uc2hpcCIgYW5kIHN0b3JlIGl0IGluICJjZW5zdXNfc2VxIi4NCmBgYHtyfQ0KaW5zdGFsbC5wYWNrYWdlcygiZHBseXIiKQ0KbGlicmFyeShkcGx5cikNCmNlbnN1c19zZXE8LXNlbGVjdChjZW5zdXNEYXRhLGFnZTpyZWxhdGlvbnNoaXApDQpjZW5zdXNfc2VxDQpgYGANCg0KIyNFeHRyYWN0IHRoZSBjb2x1bW4gbnVtYmVyICI1IiwgIjgiLCAiMTEiIGFuZCBzdG9yZSBpdCBpbiAiY2Vuc3VzX2NvbCINCmBgYHtyfQ0KY2Vuc3VzX2NvbDwtY2Vuc3VzRGF0YVssYyg1LDgsMTEpXQ0KVmlldyhjZW5zdXNfY29sKQ0KaGVhZChjZW5zdXNfY29sKQ0KYGBgDQoNCiMjRXh0cmFjdCBhbGwgdGhlIG1hbGUgZW1wbG95ZWVzIHdobyB3b3JrIGluIHN0YXRlLWdvdiBhbmQgc3RvcmUgaXQgaW4gIm1hbGVfZ292Ii4NCg0KYGBge3J9DQppbnN0YWxsLnBhY2thZ2VzKCJkcGx5ciIpDQpsaWJyYXJ5KGRwbHlyKQ0KbWFsZV9nb3Y8LWNlbnN1c0RhdGElPiUgZmlsdGVyKHNleCA9PSAiTWFsZSIgJiB3b3JrY2xhc3M9PSJTdGF0ZS1nb3YiKQ0KVmlldyhtYWxlX2dvdikNCmBgYA0KDQojI0V4dHJhY3QgYWxsIHRoZSAzOSB5ZWFyIG9sZHMgd2hvIGVpdGhlciBoYXZlIGEgYmFjaGVsb3IncyBkZWdyZWUgDQojICAgIG9yIHdobyBhcmUgbmF0aXZlIG9mIFVuaXRlZCBTdGF0ZXMgYW5kIHN0b3JlIHRoZSByZXN1bHQgaW4gImNlbnN1c191cyINCmBgYHtyfQ0KdGFibGUoY2Vuc3VzRGF0YSRuYXRpdmUuY291bnRyeSkNCnRhYmxlKGNlbnN1c0RhdGEkZWR1Y2F0aW9uKQ0KY2Vuc3VzX3VzPC1jZW5zdXNEYXRhJT4lZmlsdGVyKGFnZT09MzkmKGVkdWNhdGlvbj09IkJhY2hlbG9ycyJ8bmF0aXZlLmNvdW50cnk9PSJVbml0ZWQtU3RhdGVzIikpDQpWaWV3KGNlbnN1c191cykNCmBgYA0KDQojI0V4dHJhY3QgMjAwIHJhbmRvbSByb3dzIGZyb20gdGhlICJjZW5zdXMiIGRhdGEgZnJhbWUgYW5kIHN0b3JlIGl0IGluICJjZW5zdXNfMjAwIi4NCg0KYGBge3J9DQpjZW5zdXNfMjAwPC1zYW1wbGVfbihjZW5zdXNEYXRhLDIwMCkNClZpZXcoY2Vuc3VzXzIwMCkNCmBgYA0KDQojI0dldCB0aGUgY291bnQgb2YgZGlmZmVyZW50IGxldmVscyBvZiB0aGUgIndvcmtjbGFzcyIgY29sdW1uLg0KDQpgYGB7cn0NCmluc3RhbGwucGFja2FnZXMoInBseXIiKQ0KbGlicmFyeShwbHlyKQ0KY291bnRXY2xzPC1jb3VudChjZW5zdXNEYXRhJHdvcmtjbGFzcykNCmNvdW50V2Nscw0KdGFibGUoY2Vuc3VzRGF0YSR3b3JrY2xhc3MpDQpgYGANCg0KIyNDYWxjdWxhdGUgdGhlIG1lYW4gb2YgImNhcGl0YWwuZ2FpbiIgY29sdW1uIGdyb3VwZWQgYWNjb3JkaW5nIHRvICJ3b3JrY2xhc3MiLg0KDQpgYGB7cn0NCnRhcHBseShjZW5zdXNEYXRhJGNhcGl0YWwuZ2FpbixjZW5zdXNEYXRhJHdvcmtjbGFzcyxtZWFuKQ0KYGBgDQoNCiNEYXRhIFZpc3VhbGl6YXRpb246DQpgYGB7cn0NCmluc3RhbGwucGFja2FnZXMoImdncGxvdDIiKQ0KbGlicmFyeShnZ3Bsb3QyKQ0KYGBgDQoNCiMjQnVpbGQgYSBiYXItcGxvdCBmb3IgdGhlICJyZWxhdGlvbnNoaXAiIGNvbHVtbiBhbmQgZmlsbCB0aGUgYmFycyBhY2NvcmRpbmcgdG8gdGhlICJyYWNlIg0KIyBjb2x1bW4uDQpgYGB7cn0NCmdncGxvdChjZW5zdXNEYXRhLGFlcyh4PXJlbGF0aW9uc2hpcCxmaWxsPXJhY2UpKSsNCiAgZ2VvbV9iYXIoKQ0KYGBgDQoNCiMjU2V0IHgtYXhpcyBsYWJlbCB0byAnQ2F0ZWdvcmllcyBvZiBSZWxhdGlvbnNoaXBzJw0KIyNTZXQgeS1heGlzIGxhYmVsIHRvICdDb3VudCBvZiBDYXRlZ29yaWVzJw0KDQpgYGB7cn0NCmdncGxvdChjZW5zdXNEYXRhLGFlcyh4PXJlbGF0aW9uc2hpcCxmaWxsPXJhY2UpKSsNCiAgZ2VvbV9iYXIoKSsNCiAgbGFicyh4PSJDYXRlZ29yaWVzIG9mIFJlbGF0aW9uc2hpcHMiLHk9IkNvdW50IG9mIENhdGVnb3JpZXMiKQ0KYGBgDQoNCg0KIyNGaWxsIHRoZSBiYXJzIGFjY29yZGluZyB0byAic2V4Ig0KYGBge3J9DQpnZ3Bsb3QoY2Vuc3VzRGF0YSxhZXMoeD1yZWxhdGlvbnNoaXAsZmlsbD1zZXgpKSsNCiAgZ2VvbV9iYXIoKSsNCiAgbGFicyh4PSJDYXRlZ29yaWVzIG9mIFJlbGF0aW9uc2hpcHMiLHk9IkNvdW50IG9mIENhdGVnb3JpZXMiKQ0KYGBgDQoNCiMjU2V0IHRoZSBwb3NpdGlvbiBvZiB0aGUgYmFycyB0byAiZG9kZ2UiDQoNCmBgYHtyfQ0KZ2dwbG90KGNlbnN1c0RhdGEsYWVzKHg9cmVsYXRpb25zaGlwLGZpbGw9c2V4KSkrDQogIGdlb21fYmFyKHBvc2l0aW9uID0gImRvZGdlIikrDQogIGxhYnMoeD0iQ2F0ZWdvcmllcyBvZiBSZWxhdGlvbnNoaXBzIix5PSJDb3VudCBvZiBDYXRlZ29yaWVzIikNCmBgYA0KDQojI1NldCB0aGUgdGl0bGUgb2YgcGxvdCB0byBiZSAnRGlzdHJpYnV0aW9uIG9mIFJlbGF0aW9uc2hpcHMgYnkgU2V4Ig0KDQpgYGB7cn0NCmdncGxvdChjZW5zdXNEYXRhLGFlcyh4PXJlbGF0aW9uc2hpcCxmaWxsPXNleCkpKw0KICBnZW9tX2Jhcihwb3NpdGlvbiA9ICJkb2RnZSIpKw0KICBsYWJzKHg9IkNhdGVnb3JpZXMgb2YgUmVsYXRpb25zaGlwcyIseT0iQ291bnQgb2YgQ2F0ZWdvcmllcyIsdGl0bGUgPSAiRGlzdHJpYnV0aW9uIG9mIFJlbGF0aW9uc2hpcHMgYnkgU2V4IikNCmBgYA0KDQojI0J1aWxkIGEgSGlzdG9ncmFtIGZvciB0aGUgImFnZSIgY29sdW1uIHdpdGggbnVtYmVyIG9mIGJpbnMgZXF1YWwgdG8gNTAuDQpgYGB7cn0NCmdncGxvdChjZW5zdXNEYXRhLGFlcyh4PWFnZSkpK2dlb21faGlzdG9ncmFtKGJpbnMgPSA1MCkNCnRhYmxlKGNlbnN1c0RhdGEkYWdlKQ0KYGBgDQoNCiMjRmlsbCB0aGUgYmFycyBvZiB0aGUgaGlzdG9ncmFtIGFjY29yZGluZyB0byB5ZWFybHkgaW5jb21lIGNvbHVtbiBpLmUuLCAiWCINCmBgYHtyfQ0KZ2dwbG90KGNlbnN1c0RhdGEsYWVzKHg9YWdlLGZpbGw9WCkpK2dlb21faGlzdG9ncmFtKGJpbnMgPSA5MCkNCmBgYA0KDQojI1NldCB0aGUgdGl0bGUgb2YgdGhlIHBsb3QgdG8gIkRpc3RyaWJ1dGlvbiBvZiBBZ2UiLg0KYGBge3J9DQpnZ3Bsb3QoY2Vuc3VzRGF0YSxhZXMoeD1hZ2UsZmlsbD1YKSkrZ2VvbV9oaXN0b2dyYW0oYmlucyA9IDkwKSsNCiAgbGFicyh0aXRsZSA9ICJEaXN0cmlidXRpb24gb2YgQWdlIikNCmBgYA0KDQojI1NldCB0aGUgbGVnZW5kIHRpdGxlIHRvICJZZWFybHkgaW5jb21lIi4NCmBgYHtyfQ0KZ2dwbG90KGNlbnN1c0RhdGEsYWVzKHg9YWdlLGZpbGw9WCkpK2dlb21faGlzdG9ncmFtKGJpbnMgPSA5MCkrDQogIGxhYnModGl0bGUgPSAiRGlzdHJpYnV0aW9uIG9mIEFnZSIsZmlsbD0nWWVhcmx5IGluY29tZScpDQpgYGANCg0KIyNTZXQgdGhlIHRoZW1lIG9mIHRoZSBwbG90IHRvIGJsYWNrIGFuZCB3aGl0ZS4NCg0KYGBge3J9DQpnZ3Bsb3QoY2Vuc3VzRGF0YSxhZXMoeD1hZ2UpKStnZW9tX2hpc3RvZ3JhbShiaW5zID05MCkrDQogIGxhYnModGl0bGUgPSAiRGlzdHJpYnV0aW9uIG9mIEFnZSIpK3RoZW1lKCkNCmBgYA0KDQojI0J1aWxkIGEgc2NhdHRlci1wbG90IGJldHdlZW4gImNhcGl0YWwuZ2FpbiIgYW5kICJob3Vycy5wZXIud2VlayIuDQojIyAgICAgTWFwICJjYXBpdGFsLmdhaW4iIG9uIHRoZSB4LSBheGlzIGFuZCAiaG91cnMucGVyLndlZWsiIG9uIHRoZSB5LWF4aXMuDQoNCmBgYHtyfQ0KZ2dwbG90KGNlbnN1c0RhdGEsYWVzKHg9Y2FwaXRhbC5nYWluLHk9aG91cnMucGVyLndlZWspKStnZW9tX3BvaW50KCkNCmBgYA0KDQojI1NldCB0aGUgdHJhbnNwYXJlbmN5IG9mIHRoZSBwb2ludHMgdG8gNDAlIGFuZCBzaXplIGFzIDIuDQoNCmBgYHtyfQ0KZ2dwbG90KGNlbnN1c0RhdGEsYWVzKHg9Y2FwaXRhbC5nYWluLHk9aG91cnMucGVyLndlZWspKSsNCiAgZ2VvbV9wb2ludChhbHBoYT0wLjYsc2l6ZT0yKQ0KYGBgDQoNCiMjU2V0IHRoZSBjb2xvciBvZiB0aGUgcG9pbnRzIGFjY29yZGluZyB0byB0aGUgIlgiICh5ZWFybHkgaW5jb21lKSBjb2x1bW4uIA0KDQpgYGB7cn0NCmdncGxvdChjZW5zdXNEYXRhLGFlcyh4PWNhcGl0YWwuZ2Fpbix5PWhvdXJzLnBlci53ZWVrLGZpbGw9WCkpK2dlb21fcG9pbnQoKQ0KYGBgDQoNCiMjU2V0IHRoZSB4LWF4aXMgbGFiZWwgdG8gIkNhcGl0YWwgR2FpbiIsIHktYXhpcyBsYWJlbCB0byAiSG91cnMgcGVyIFdlZWsiLCB0aXRsZQ0KIyB0byAiQ2FwaXRhbCBHYWluIHZzIEhvdXJzIHBlciBXZWVrIGJ5IEluY29tZSIsIGFuZCBsZWdlbmQgbGFiZWwgdG8gIlllYXJseSBJbmNvbWUiLg0KDQpgYGB7cn0NCmdncGxvdChjZW5zdXNEYXRhLGFlcyh4PWNhcGl0YWwuZ2Fpbix5PWhvdXJzLnBlci53ZWVrLGZpbGw9WCkpKw0KICBnZW9tX3BvaW50KGFscGhhPTAuNixzaXplPTIpKw0KbGFicyh4PSJDYXBpdGFsIEdhaW4iLHk9IkhvdXJzIHBlciBXZWVrIiwNCiAgICAgdGl0bGUgPSAiQ2FwaXRhbCBHYWluIHZzIEhvdXJzIHBlciBXZWVrIGJ5IEluY29tZSIsIGZpbGw9IlllYXJseSBJbmNvbWUiKQ0KYGBgDQoNCg0KYGBge3J9DQppbnN0YWxsLnBhY2thZ2VzKCJwbG90bHkiKQ0KbGlicmFyeShwbG90bHkpDQpwbG90X2x5KGRhdGE9Y2Vuc3VzRGF0YSwgeCA9IH5jYXBpdGFsLmdhaW4sIHkgPSB+aG91cnMucGVyLndlZWssIGNvbG9yID0gflgsIHR5cGU9J3NjYXR0ZXInKQ0KYGBgDQoNCiMjQnVpbGQgYSBib3gtcGxvdCBiZXR3ZWVuICJlZHVjYXRpb24iIGFuZCAiYWdlIiBjb2x1bW4uTWFwICJlZHVjYXRpb24iIG9uIHRoZSB4LWF4aXMgYW5kDQojIyAiYWdlIiBvbiB0aGUgeS1heGlzLg0KDQogDQpgYGB7cn0NCmdncGxvdChjZW5zdXNEYXRhLGFlcyh4PWVkdWNhdGlvbix5PWFnZSkpK2dlb21fYm94cGxvdCgpDQpgYGANCg0KIyMgRmlsbCB0aGUgYm94LXBsb3RzIGFjY29yZGluZyB0byB0aGUgInNleCIgY29sdW1uLg0KIA0KYGBge3J9DQpnZ3Bsb3QoY2Vuc3VzRGF0YSxhZXMoeD1lZHVjYXRpb24seT1hZ2UsZmlsbD1zZXgpKStnZW9tX2JveHBsb3QoKQ0KYGBgDQoNCiMjIFNldCB0aGUgdGl0bGUgdG8gIkJveC1QbG90IG9mIGFnZSBieSBFZHVjYXRpb24gYW5kIFNleCIuDQogDQpgYGB7cn0NCmdncGxvdChjZW5zdXNEYXRhLGFlcyh4PWVkdWNhdGlvbix5PWFnZSxmaWxsPXNleCkpKw0KICAgZ2VvbV9ib3hwbG90KCkrbGFicyh0aXRsZSA9ICJCb3gtUGxvdCBvZiBhZ2UgYnkgRWR1Y2F0aW9uIGFuZCBTZXgiKQ0KYGBgDQoNCiNQcmVkaWN0aW9uIGd1aWxkaW5nIGEgTGluZWFyIFJlZ3Jlc3Npb24gTW9kZWw6DQojIyBCdWlsZCBhIHNpbXBsZSBsaW5lYXIgcmVncmVzc2lvbiBtb2RlbA0KIyNEaXZpZGUgdGhlIGRhdGFzZXQgaW50byB0cmFpbmluZyBhbmQgdGVzdCBzZXRzIGluIDcwOjMwIHJhdGlvLg0KYGBge3J9DQogc2V0LnNlZWQoOTgpDQogaW5zdGFsbC5wYWNrYWdlcygiY2FUb29scyIpDQogbGlicmFyeSgiY2FUb29scyIpDQogc3BsaXRfZGF0YTwtc2FtcGxlLnNwbGl0KGNlbnN1c0RhdGEkaG91cnMucGVyLndlZWssU3BsaXRSYXRpbyA9IDAuNzApDQogVmlldyhzcGxpdF9kYXRhKQ0KIGNlbnN1c1RyYWluPC1zdWJzZXQoY2Vuc3VzRGF0YSxzcGxpdF9kYXRhPT1UKQ0KIGNlbnN1c1Rlc3Q8LXN1YnNldChjZW5zdXNEYXRhLHNwbGl0X2RhdGE9PUYpDQogVmlldyhjZW5zdXNUcmFpbikNCiBWaWV3KGNlbnN1c1Rlc3QpDQogbnJvdyhjZW5zdXNUcmFpbikNCiBucm93KGNlbnN1c1Rlc3QpDQpWaWV3KHNwbGl0X2RhdGEpDQpgYGANCg0KIyMgQnVpbGQgYSBsaW5lYXIgbW9kZWwgb24gdGhlIHRyYWluIHNldCB3aGVyZSB0aGUgZGVwZW5kZW50IHZhcmlhYmxlIGlzDQojIyJob3Vycy5wZXIud2VlayIgYW5kIGluZGVwZW5kZW50IHZhcmlhYmxlIGlzICJlZHVjYXRpb24ubnVtIi4gDQojIyBkZXBlbmRlbnR+aW5kZXBlbmQNCiANCmBgYHtyfQ0KVmlldyhjZW5zdXNEYXRhW2MoJ2hvdXJzLnBlci53ZWVrJywnZWR1Y2F0aW9uLm51bScpXSkNCiANCiBMUl9tb2RlbDwtbG0oaG91cnMucGVyLndlZWt+ZWR1Y2F0aW9uLm51bSxkYXRhPWNlbnN1c1RyYWluKQ0KIHN1bW1hcnkoTFJfbW9kZWwpDQpgYGANCiMgUHJlZGljdGluZyB0aGUgdmFsdWVzIG9uIHRoZSB0ZXN0IHNldCBhbmQgZmluZCB0aGUgZXJyb3IgaW4gcHJlZGljdGlvbi4gDQojI0ZpbmQgdGhlIHJvb3QtbWVhbi1zcXVhcmUgZXJyb3IgKFJNU0UpLg0KDQoNCmBgYHtyfQ0KIGNlbnN1c1A8LXByZWRpY3QoTFJfbW9kZWwsbmV3ZGF0YT1jZW5zdXNUZXN0KQ0KIGhlYWQoY2Vuc3VzUCkNCiBWaWV3KGNlbnN1c1ApDQogY2Vuc3VzRDwtY2JpbmQoQWN0dWFsPWNlbnN1c1Rlc3QkaG91cnMucGVyLndlZWssUHJlZGljdGVkPWNlbnN1c1ApDQogVmlldyhjZW5zdXNEKQ0KIGNsYXNzKGNlbnN1c0QpDQogY2Vuc3VzRDwtYXMuZGF0YS5mcmFtZShjZW5zdXNEKQ0KIEVycm9yPC1jZW5zdXNEJEFjdHVhbC1jZW5zdXNEJFByZWRpY3RlZA0KIFZpZXcoRXJyb3IpDQpEYXRhPC1jYmluZChjZW5zdXNELEVycm9yKQ0KIFZpZXcoRGF0YSkNCmBgYA0KDQoNCmBgYHtyfQ0Kc3FydChtZWFuKChEYXRhJEVycm9yKV4yKSkNCmxpYnJhcnkoY2FyZXQpDQpSTVNFKGNlbnN1c1AsIGNlbnN1c1Rlc3QkaG91cnMucGVyLndlZWspDQpgYGANCg0KI1ByZWRpY3Rpb24gYnVpbGRpbmcgYSAJTG9naXN0aWMgUmVncmVzc2lvbg0KDQojIyBsZXQncyBkaXZpZGUgdGhlIGRhdGFzZXQgaW50byB0cmFpbmluZyBhbmQgdGVzdCBzZXRzIGluIDY1OjM1IHJhdGlvLg0KIA0KYGBge3J9DQppbnN0YWxsLnBhY2thZ2VzKCJjYVRvb2xzIikNCiBsaWJyYXJ5KCJjYVRvb2xzIikNCiBzcGxpdF9kYXRhMTwtc2FtcGxlLnNwbGl0KGNlbnN1c0RhdGEkWCxTcGxpdFJhdGlvID0gMC42NSkNCiBjZW5zdXNUcmFpbjE8LXN1YnNldChjZW5zdXNEYXRhLHNwbGl0X2RhdGExPT1UKQ0KIGNlbnN1c1Rlc3QxPC1zdWJzZXQoY2Vuc3VzRGF0YSxzcGxpdF9kYXRhMT09RikNCiBucm93KGNlbnN1c1RyYWluMSkNCiBucm93KGNlbnN1c1Rlc3QxKQ0KYGBgDQoNCiMjIyBub3cgbGV0J3MgYnVpbGQgYSBsb2dpc3RpYyByZWdyZXNzaW9uIG1vZGVsIHdoZXJlIHRoZSBkZXBlbmRlbnQgdmFyaWFibGUgaXMgIlgiKHllYXJseSBpbmNvbWUpIGFuZCBpbmRlcGVuZGVudCB2YXJpYWJsZSBpcyAib2NjdXBhdGlvbiIuDQoNCmBgYHtyfQ0KbG9nX21vZDwtZ2xtKFh+b2NjdXBhdGlvbixkYXRhPWNlbnN1c1RyYWluMSxmYW1pbHkgPSAiYmlub21pYWwiKQ0Kc3VtbWFyeShsb2dfbW9kKQ0KYGBgDQoNCiMjIFByZWRpY3RpbmcgdGhlIHZhbHVlcyBvbiB0aGUgdGVzdCBzZXQuIyMjIyBUTyBkZWNpZGUgQWNjdXJhY3kgd2UgbmVlZCB0byBpbnN0YWxsIFJPQ1IuIHBsb3QoYWNjKSMjIENoZWNrIGZvciB3aGljaCB2YWx2ZSBhY2N1cmFjeSBnZXQgY29uc3RhbnQNCmBgYHtyfQ0KcHJlZF92YWw8LXByZWRpY3QobG9nX21vZCxuZXdkYXRhID1jZW5zdXNUZXN0MSx0eXBlID0gInJlc3BvbnNlIikjcHJvYmFiaWxpdHkNCmhlYWQocHJlZF92YWwpDQpyYW5nZShwcmVkX3ZhbCkNCmluc3RhbGwucGFja2FnZXMoIlJPQ1IiKQ0KbGlicmFyeShST0NSKQ0KcHJlZGljdF9sb2dfcm9jPC1wcmVkaWN0aW9uKHByZWRfdmFsLGNlbnN1c1Rlc3QxJFgpDQpwcmVkaWN0X2xvZ19yb2MNCmFjYzwtcGVyZm9ybWFuY2UocHJlZGljdF9sb2dfcm9jLCJhY2MiKQ0KcGxvdChhY2MpDQp0YWJsZShjZW5zdXNEYXRhJFgpDQpgYGANCiMjIFBsb3QgYWNjdXJhY3kgdnMgY3V0LW9mZiBhbmQgcGljayBhbiBpZGVhbCB2YWx1ZSBmb3IgY3V0LW9mZi4NCg0KYGBge3J9DQpsbS5wcmVkPC1pZmVsc2UocHJlZF92YWw+MC40NywiPjUwSyIsIjw9NTBLIikgIA0KbG0ucHJlZA0KDQpgYGANCg0KDQojIEJ1aWxkIGEgY29uZnVzaW9uIG1hdHJpeCBhbmQgZmluZCB0aGUgYWNjdXJhY3kuDQoNCmBgYHtyfQ0KdGFiPC10YWJsZShsbS5wcmVkLGNlbnN1c1Rlc3QxJFgpDQp0YWINCmBgYA0KDQojVFAgRlANCiNGTiBUTg0KI1RQIFROIC1jb3JyZWN0bHkgcHJlZGljdGVkDQojRlAgRk4gLSB3cm9uZ2x5IHByZWRpY3RlZA0KDQoNCmBgYHtyfQ0KKDcxODgrNjYwKS8oNzE4OCs2NjArMTk2OCs3NDEpDQphY2N1cmFjeTwtc3VtKGRpYWcodGFiKSkvc3VtKHRhYikNCmFjY3VyYWN5DQpgYGANCg0KIyBQbG90IHRoZSBST0MgY3VydmUgYW5kIGZpbmQgdGhlIGF1YyhBcmVhIFVuZGVyIEN1cnZlKS4gDQoNCmBgYHtyfQ0KaW5zdGFsbC5wYWNrYWdlcygiY2FUb29scyIpDQogbGlicmFyeSgiY2FUb29scyIpDQpyb2M8LXBlcmZvcm1hbmNlKHByZWRpY3RfbG9nX3JvYywidHByIiwiZnByIikNCnBsb3Qocm9jKQ0KcGVyZm9ybWFuY2UocHJlZGljdF9sb2dfcm9jLCAiYXVjIiktPmF1Yw0KYXVjDQphdWM8LWF1Y0B5LnZhbHVlc1tbMV1dDQphdWMNCnNwbGl0X2RhdGExPC0gc2FtcGxlLnNwbGl0KGNlbnN1c0RhdGEkWCxTcGxpdFJhdGlvID0gMC44MCkNCmNlbnN1c1RyYWluMjwtc3Vic2V0KGNlbnN1c0RhdGEsc3BsaXRfZGF0YTE9PVQpDQpjZW5zdXNUZXN0Mjwtc3Vic2V0KGNlbnN1c0RhdGEsc3BsaXRfZGF0YTE9PUYpDQoNCmxvZ19tb2QyPC1nbG0oWH5hZ2Urd29ya2NsYXNzK2VkdWNhdGlvbixkYXRhPWNlbnN1c1RyYWluMixmYW1pbHkgPSAiYmlub21pYWwiKQ0Kc3VtbWFyeShsb2dfbW9kMikNCnByZWRfdmFsPC1wcmVkaWN0KGxvZ19tb2QyLG5ld2RhdGEgPWNlbnN1c1Rlc3QyLHR5cGUgPSAicmVzcG9uc2UiKQ0KaGVhZChwcmVkX3ZhbCkNCmBgYA0KDQojI2xpYnJhcnkoUk9DUikgIyMgVE8gZGVjaWRlIEFjY3VyYWN5DQoNCmBgYHtyfQ0KbGlicmFyeShST0NSKQ0KcHJlZGljdF9sb2dfcm9jPC1wcmVkaWN0aW9uKHByZWRfdmFsLGNlbnN1c1Rlc3QyJFgpDQpwcmVkaWN0X2xvZ19yb2MNCmFjYzwtcGVyZm9ybWFuY2UocHJlZGljdF9sb2dfcm9jLCJhY2MiKQ0KcGxvdChhY2MpDQpsbS5wcmVkPC1pZmVsc2UocHJlZF92YWw+MC40NSwiPjUwSyIsIjw9NTBLIikgIA0KbG0ucHJlZA0KDQp0YWI8LXRhYmxlKGxtLnByZWQsY2Vuc3VzVGVzdDIkWCkNCnRhYg0KYWNjdXJhY3k8LXN1bShkaWFnKHRhYikpL3N1bSh0YWIpDQphY2N1cmFjeQ0KDQpyb2M8LXBlcmZvcm1hbmNlKHByZWRpY3RfbG9nX3JvYywidHByIiwiZnByIikNCnBsb3Qocm9jKQ0KcGVyZm9ybWFuY2UocHJlZGljdF9sb2dfcm9jLCAiYXVjIiktPmF1Yw0KYXVjDQphdWM8LWF1Y0B5LnZhbHVlc1tbMV1dDQphdWMNCmBgYA0KI1ByZWRpY3Rpb24gYnVpbGRpbmcgYSAgRGVjaXNpb24gVHJlZSBNb2RlbDoNCiMjIERpdmlkZSB0aGUgZGF0YXNldCBpbnRvIHRyYWluaW5nIGFuZCB0ZXN0IHNldHMgaW4gNzA6MzAgcmF0aW8uDQpgYGB7cn0NCnNldC5zZWVkKDEyMykNCmluc3RhbGwucGFja2FnZXMoImNhVG9vbHMiKQ0KIGxpYnJhcnkoImNhVG9vbHMiKQ0Kc3BsaXRfZGF0YTwtc2FtcGxlLnNwbGl0KGNlbnN1c0RhdGEsU3BsaXRSYXRpbyA9IDAuNzApDQpjZW5zdXNUcmFpbjwtc3Vic2V0KGNlbnN1c0RhdGEsc3BsaXRfZGF0YT09VCkNCmNlbnN1c1Rlc3Q8LXN1YnNldChjZW5zdXNEYXRhLHNwbGl0X2RhdGE9PUYpDQpucm93KGNlbnN1c1RyYWluKQ0KbnJvdyhjZW5zdXNUZXN0KQ0KIyBCdWlsZCBhIGRlY2lzaW9uIHRyZWUgbW9kZWwgd2hlcmUgdGhlIGRlcGVuZGVudCB2YXJpYWJsZSBpcyAiWCIoWWVhcmx5IEluY29tZSkgYW5kIHRoZSByZXN0IG9mIHRoZSB2YXJpYWJsZXMgYXMgaW5kZXBlbmRlbnQgdmFyaWFibGVzDQpsaWJyYXJ5KHJwYXJ0KQ0KbGlicmFyeShycGFydC5wbG90KSANCg0KY2Vuc3VzX21vZGVsPC1ycGFydChmb3JtdWxhID0gWH4uLA0KICAgICAgICAgICAgICAgICAgICBkYXRhID0gY2Vuc3VzVHJhaW4sDQogICAgICAgICAgICAgICAgICAgIG1ldGhvZCA9ICJjbGFzcyIpDQpgYGANCg0KIyMgUGxvdCB0aGUgZGVjaXNpb24gdHJlZSANCg0KYGBge3J9DQpsaWJyYXJ5KHJwYXJ0KQ0KbGlicmFyeShycGFydC5wbG90KQ0KcnBhcnQucGxvdCh4PSBjZW5zdXNfbW9kZWwsIHR5cGU9IDUsIGV4dHJhID0gMCx0d2VhayA9IDEuNSkNCmBgYA0KDQojIyBQcmVkaWN0IHRoZSB2YWx1ZXMgb24gdGhlIHRlc3Qgc2V0DQpgYGB7cn0NCmNsYXNzX3ByZWRpY3Rpb248LXByZWRpY3QoY2Vuc3VzX21vZGVsLA0KICAgICAgICAgICAgICAgICAgICAgICAgICBuZXdkYXRhID0gY2Vuc3VzVGVzdCwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgdHlwZSA9ICJjbGFzcyIpDQpjbGFzc19wcmVkaWN0aW9uDQpgYGANCg0KI1RQIEZQDQojRk4gVE4NCiNUUCBUTiAtY29ycmVjdGx5IHByZWRpY3RlZA0KI0ZQIEZOIC0gd3JvbmdseSBwcmVkaWN0ZWQNCiMgQnVpbGQgYSBjb25mdXNpb24gbWF0cml4IGFuZCBjYWxjdWxhdGUgdGhlIGFjY3VyYWN5DQpgYGB7cn0NCnRhYjwtdGFibGUoY2xhc3NfcHJlZGljdGlvbixjZW5zdXNUZXN0JFgpDQp0YWINCnN1bShkaWFnKHRhYikpL3N1bSh0YWIpDQpgYGANCiNQcmVkaWN0aW9uIGJ1aWxkaW5nIGEgcmFuZG9tIEZvcmVzdDoNCiMjIGxldCdzIGJ1aWxkIGEgcmFuZG9tIGZvcmVzdCBtb2RlbA0KIyMgRGl2aWRlIHRoZSBkYXRhc2V0IGludG8gdHJhaW5pbmcgYW5kIHRlc3Qgc2V0cyBpbiA4MDoyMCByYXRpbw0KDQpgYGB7cn0NCnNldC5zZWVkKDEyMykNCmluc3RhbGwucGFja2FnZXMoImNhVG9vbHMiKQ0KIGxpYnJhcnkoImNhVG9vbHMiKQ0Kc3BsaXRfZGF0YTwtc2FtcGxlLnNwbGl0KGNlbnN1c0RhdGEkWCxTcGxpdFJhdGlvID0gMC44KQ0KY2Vuc3VzVHJhaW48LXN1YnNldChjZW5zdXNEYXRhLHNwbGl0X2RhdGE9PVQpDQpjZW5zdXNUZXN0PC1zdWJzZXQoY2Vuc3VzRGF0YSxzcGxpdF9kYXRhPT1GKQ0KbnJvdyhjZW5zdXNUcmFpbikNCm5yb3coY2Vuc3VzVGVzdCkNCmBgYA0KIyMgbm93IGxldCdzIGJ1aWxkIGEgcmFuZG9tIGZvcmVzdCBtb2RlbCB3aGVyZSB0aGUgZGVwZW5kZW50IHZhcmlhYmxlIGlzICJYIihZZWFybHkgSW5jb21lKSBhbmQgdGhlIHJlc3Qgb2YgdGhlIHZhcmlhYmxlcyBhcyBpbmRlcGVuZGVudCB2YXJpYWJsZXMgYW5kIG51bWJlciBvZiB0cmVlcyBhcyAzMDAuDQoNCg0KYGBge3J9DQpsaWJyYXJ5KHJhbmRvbUZvcmVzdCkNCg0KY2Vuc3VzX21vZGVsPC1yYW5kb21Gb3Jlc3QoZm9ybXVsYT1Yfi4sDQogICAgICAgICAgICAgICAgICAgICAgICAgICBkYXRhPWNlbnN1c1RyYWluLA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgbnRyZWU9MzAwKQ0KDQpwbG90KGNlbnN1c19tb2RlbCkNCmBgYA0KDQoNCmBgYHtyfQ0KdGV4dChjZW5zdXNfbW9kZWwpDQpgYGANCiMjIFByZWRpY3QgdmFsdWVzIG9uIHRoZSB0ZXN0IHNldA0KDQpgYGB7cn0NCmNlbnVzX3ByZWRpY3Rpb248LXByZWRpY3QoY2Vuc3VzX21vZGVsLA0KICAgICAgICAgICAgICAgICAgICAgICAgICBuZXdkYXRhID0gY2Vuc3VzVGVzdCwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgdHlwZSA9ICJjbGFzcyIpDQpgYGANCg0KIyBCdWlsZCBhIGNvbmZ1c2lvbiBtYXRyaXggYW5kIGNhbGN1bGF0ZSB0aGUgYWNjdXJhY3kNCmBgYHtyfQ0KdGFiPC10YWJsZShjZW51c19wcmVkaWN0aW9uLGNlbnN1c1Rlc3QkWCkNCnRhYg0Kc3VtKGRpYWcodGFiKSkvc3VtKHRhYikNCmBgYA==